[llvm] [mlir] [AMDGPU][Verifier] Check address space of `alloca` instruction (PR #135820)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 20 15:52:16 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/135820
>From c9c1eefa7714fccc3661d79e690fc17945ab7fe1 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 20 Apr 2025 18:51:54 -0400
Subject: [PATCH] [AMDGPU][Verifier] Check address space of `alloca`
instruction
---
llvm/lib/IR/Verifier.cpp | 5 +
.../AMDGPU/assert-wrong-alloca-addrspace.ll | 16 -
.../AMDGPU/lower-indirect-lds-references.ll | 5 +-
.../InstCombine/alloca-in-non-alloca-as.ll | 66 -
.../OpenMP/custom_state_machines.ll | 2149 ++++++------
.../OpenMP/custom_state_machines_pre_lto.ll | 2866 +++++++++--------
llvm/test/Transforms/OpenMP/spmdization.ll | 1331 ++++----
.../OpenMP/spmdization_constant_prop.ll | 253 +-
.../Transforms/OpenMP/spmdization_indirect.ll | 570 ++--
llvm/test/Verifier/AMDGPU/alloca.ll | 16 +
.../LLVMIR/omptarget-parallel-llvm.mlir | 5 +-
11 files changed, 4002 insertions(+), 3280 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
delete mode 100644 llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
create mode 100644 llvm/test/Verifier/AMDGPU/alloca.ll
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8afe360d088bc..60209cfda00e6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4392,6 +4392,11 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
verifySwiftErrorValue(&AI);
}
+ if (TT.isAMDGPU()) {
+ Check(AI.getAddressSpace() == DL.getAllocaAddrSpace(),
+ "alloca on amdgpu must be in addrspace(5)", &AI);
+ }
+
visitInstruction(AI);
}
diff --git a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll b/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
deleted file mode 100644
index 1e72e679e83c0..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/assert-wrong-alloca-addrspace.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s
-
-; The alloca has the wrong address space and is passed to a call. The
-; FrameIndex was created with the natural 32-bit pointer type instead
-; of the declared 64-bit. Make sure we don't assert.
-
-; CHECK: LLVM ERROR: Cannot select: {{.*}}: i64 = FrameIndex<0>
-
-declare void @func(ptr)
-
-define void @main() {
-bb:
- %alloca = alloca i32, align 4
- call void @func(ptr %alloca)
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll b/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll
index 1b0c8d66d3ebc..4309dacc9da2b 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-indirect-lds-references.ll
@@ -16,8 +16,9 @@ define amdgpu_kernel void @offloading_kernel() {
}
define void @call_unknown() {
- %1 = alloca ptr, align 8
- %2 = call i32 %1()
+ %alloca = alloca ptr, align 8, addrspace(5)
+ %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+ %ret = call i32 %alloca.cast()
ret void
}
diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
deleted file mode 100644
index 9a2bfac0feb02..0000000000000
--- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-
-; Gracefully handle the alloca that is not in the alloca AS (=5)
-
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
-target triple = "amdgcn-amd-amdhsa"
-
-declare void @use(ptr)
-declare void @use2(ptr, ptr)
-
-define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(ptr %a) {
-; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8(
-; CHECK-NEXT: .master:
-; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 1
-; CHECK-NEXT: call void @use2(ptr nonnull [[TMP0]], ptr nonnull [[TMP0]])
-; CHECK-NEXT: ret void
-;
-.master:
- %0 = alloca i8, i64 8, align 1
- store ptr undef, ptr %0, align 8
- call void @use2(ptr %0, ptr %0)
- ret void
-}
-
-%struct.widget = type { [8 x i8] }
-
-define void @spam(ptr %arg1) {
-; CHECK-LABEL: @spam(
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16
-; CHECK-NEXT: call void @zot(ptr nonnull [[ALLOCA1]])
-; CHECK-NEXT: ret void
-;
-bb:
- %alloca = alloca [30 x %struct.widget], i32 0, align 16
- call void @zot(ptr %alloca)
- ret void
-}
-
-define i1 @alloca_addrspace_0_nonnull() {
-; CHECK-LABEL: @alloca_addrspace_0_nonnull(
-; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1
-; CHECK-NEXT: call void @use(ptr nonnull [[ALLOCA]])
-; CHECK-NEXT: ret i1 true
-;
- %alloca = alloca i8
- call void @use(ptr %alloca)
- %cmp = icmp ne ptr %alloca, null
- ret i1 %cmp
-}
-
-define i1 @alloca_addrspace_5_nonnull() {
-; CHECK-LABEL: @alloca_addrspace_5_nonnull(
-; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5)
-; CHECK-NEXT: call void @use(ptr addrspace(5) [[ALLOCA]])
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspace(5) [[ALLOCA]], null
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %alloca = alloca i8, addrspace(5)
- call void @use(ptr addrspace(5) %alloca)
- %cmp = icmp ne ptr addrspace(5) %alloca, null
- ret i1 %cmp
-}
-
-declare hidden void @zot(ptr)
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 10e521bbfcc10..2fe28daf304a6 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -128,7 +128,6 @@
@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
@G = external global i32, align 4
@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, ptr @0 }, align 8
-
@__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@@ -138,19 +137,22 @@
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -158,22 +160,25 @@ worker.exit: ; preds = %entry
ret void
}
-; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr, ptr) {
+define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
ret i32 0
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @no_parallel_region_in_here() #7
call void @unknown_no_openmp() #8
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @no_parallel_region_in_here() #1 {
entry:
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
@@ -191,25 +196,30 @@ omp_if.end: ; preds = %omp_if.then, %entry
ret void
}
+; Function Attrs: convergent
declare void @unknown_no_openmp() #2
+; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__1(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -217,46 +227,60 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @no_parallel_region_in_here() #7
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent
declare void @p0() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__2(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
@@ -264,45 +288,57 @@ declare void @__kmpc_get_shared_variables(ptr)
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent
declare void @p1() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__3(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__4(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -310,76 +346,95 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
call void @simple_state_machine_interprocedural_before() #7
call void @no_parallel_region_in_here() #7
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @simple_state_machine_interprocedural_after() #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_before() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__5(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_after() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__6(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -387,85 +442,110 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%call = call i32 @unknown() #7
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__7(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent
declare i32 @unknown() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__8(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__9(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -473,83 +553,107 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @unknown_no_openmp() #8
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__10(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__11(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__12(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -557,86 +661,111 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @unknown_pure() #9
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__13(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent nounwind willreturn memory(read)
declare void @unknown_pure() #5
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__14(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__15(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -644,22 +773,27 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
%call = call i32 @omp_get_thread_num() #7
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
entry:
- %a.addr = alloca i32, align 4
- store i32 %a, ptr %a.addr, align 4
- %0 = load i32, ptr %a.addr, align 4
+ %a.addr = alloca ptr, align 8, addrspace(5)
+ %a.addr.cast = addrspacecast ptr addrspace(5) %a.addr to ptr
+ store i32 %a, ptr %a.addr.cast, align 4
+ %0 = load i32, ptr %a.addr.cast, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
@@ -667,7 +801,7 @@ if.then: ; preds = %entry
br label %return
if.end: ; preds = %entry
- %1 = load i32, ptr %a.addr, align 4
+ %1 = load i32, ptr %a.addr.cast, align 4
%sub = sub nsw i32 %1, 1
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
@@ -677,21 +811,25 @@ return: ; preds = %if.end, %if.then
ret void
}
+; Function Attrs: convergent
declare i32 @omp_get_thread_num(...) #4
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__16(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -699,120 +837,153 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @weak_callee_empty() #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define weak hidden void @weak_callee_empty() #1 {
entry:
ret void
}
+; Function Attrs: convergent nounwind
declare i32 @__kmpc_single(ptr, i32) #6
+; Function Attrs: convergent nounwind
declare void @__kmpc_end_single(ptr, i32) #6
+; Function Attrs: convergent nounwind
declare void @__kmpc_barrier(ptr, i32) #6
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__17(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__18(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__19(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
-attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #3 = { nounwind }
attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+attributes #5 = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #6 = { convergent nounwind }
attributes #7 = { convergent }
attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
-attributes #9 = { convergent nounwind readonly willreturn }
+attributes #9 = { convergent nounwind willreturn memory(read) }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
-!llvm.module.flags = !{!16, !17, !18}
+!llvm.module.flags = !{!8, !9, !10}
!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -822,9 +993,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!16 = !{i32 1, !"wchar_size", i32 4}
-!17 = !{i32 7, !"openmp", i32 50}
-!18 = !{i32 7, !"openmp-device", i32 50}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{i32 7, !"openmp", i32 50}
+!10 = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
@@ -904,14 +1075,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -927,8 +1100,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU-NEXT: ret void
@@ -973,8 +1144,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1019,7 +1192,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1030,13 +1203,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1044,8 +1218,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU-NEXT: ret void
;
@@ -1054,12 +1226,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1067,8 +1241,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1077,12 +1249,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1091,8 +1265,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1143,7 +1319,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1154,12 +1330,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; AMDGPU-NEXT: ret void
;
@@ -1168,9 +1344,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1178,9 +1355,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; AMDGPU-SAME: () #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1188,8 +1366,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1198,12 +1374,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1211,9 +1389,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; AMDGPU-SAME: () #[[ATTR6]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1221,9 +1400,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; AMDGPU-SAME: () #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1232,8 +1412,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1280,7 +1462,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1291,12 +1473,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1304,8 +1487,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1314,12 +1495,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1327,8 +1510,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1337,12 +1518,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1351,8 +1534,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1397,7 +1582,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1408,12 +1593,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1421,8 +1607,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1431,12 +1615,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1444,8 +1630,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1454,12 +1638,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1468,8 +1654,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1514,7 +1702,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1525,12 +1713,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1538,8 +1727,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1548,12 +1735,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1561,8 +1750,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1571,12 +1758,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1584,14 +1773,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1602,8 +1793,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU-NEXT: ret void
@@ -1613,15 +1802,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU: if.then:
; AMDGPU-NEXT: br label [[RETURN:%.*]]
; AMDGPU: if.end:
-; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -1634,15 +1823,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU: if.then:
; AMDGPU-NEXT: br label [[RETURN:%.*]]
; AMDGPU: if.end:
-; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -1656,8 +1846,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1692,7 +1884,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: ret void
; AMDGPU: worker.exit:
@@ -1703,8 +1895,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU-NEXT: ret void
;
@@ -1720,8 +1910,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1730,12 +1918,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1743,8 +1933,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1753,12 +1941,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1766,9 +1956,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; AMDGPU-SAME: () #[[ATTR6]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1776,9 +1967,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; AMDGPU-SAME: () #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: ret void
;
;
@@ -1786,8 +1978,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-NEXT: ret void
;
@@ -1796,12 +1986,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-NEXT: ret void
;
;
@@ -1809,14 +2001,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -1832,8 +2026,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX-NEXT: ret void
@@ -1878,8 +2070,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1923,7 +2117,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -1934,13 +2128,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -1948,8 +2143,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX-NEXT: ret void
;
@@ -1958,12 +2151,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -1971,8 +2166,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -1981,12 +2174,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -1995,8 +2190,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2046,7 +2243,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2057,12 +2254,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; NVPTX-NEXT: ret void
;
@@ -2071,9 +2268,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; NVPTX-SAME: () #[[ATTR6:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2081,9 +2279,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; NVPTX-SAME: () #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2091,8 +2290,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2101,12 +2298,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2114,9 +2313,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; NVPTX-SAME: () #[[ATTR6]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2124,9 +2324,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; NVPTX-SAME: () #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2135,8 +2336,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2182,7 +2385,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2193,12 +2396,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2206,8 +2410,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2216,12 +2418,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2229,8 +2433,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2239,12 +2441,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2253,8 +2457,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2298,7 +2504,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2309,12 +2515,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2322,8 +2529,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2332,12 +2537,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2345,8 +2552,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2355,12 +2560,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2369,8 +2576,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2414,7 +2623,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2425,12 +2634,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2438,8 +2648,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2448,12 +2656,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2461,8 +2671,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2471,12 +2679,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2484,14 +2694,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2502,8 +2714,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX-NEXT: ret void
@@ -2513,15 +2723,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX: if.then:
; NVPTX-NEXT: br label [[RETURN:%.*]]
; NVPTX: if.end:
-; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -2534,15 +2744,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX: if.then:
; NVPTX-NEXT: br label [[RETURN:%.*]]
; NVPTX: if.end:
-; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -2556,8 +2767,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2591,7 +2804,7 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: ret void
; NVPTX: worker.exit:
@@ -2602,8 +2815,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX-NEXT: ret void
;
@@ -2619,8 +2830,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2629,12 +2838,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2642,8 +2853,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2652,12 +2861,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2665,9 +2876,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; NVPTX-SAME: () #[[ATTR6]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2675,9 +2887,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; NVPTX-SAME: () #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: ret void
;
;
@@ -2685,8 +2898,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-NEXT: ret void
;
@@ -2695,12 +2906,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-NEXT: ret void
;
;
@@ -2708,14 +2921,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2731,8 +2946,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -2776,14 +2989,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2794,13 +3009,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2808,8 +3024,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2818,12 +3032,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2831,8 +3047,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2841,12 +3055,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2854,14 +3070,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2872,12 +3090,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2886,9 +3104,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2896,9 +3115,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2906,8 +3126,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2916,12 +3134,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2929,9 +3149,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2939,9 +3160,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2949,14 +3171,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -2967,12 +3191,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -2980,8 +3205,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -2990,12 +3213,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3003,8 +3228,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3013,12 +3236,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3026,14 +3251,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3044,12 +3271,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3057,8 +3285,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3067,12 +3293,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3080,8 +3308,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3090,12 +3316,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3103,14 +3331,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3121,12 +3351,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3134,8 +3365,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3144,12 +3373,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3157,8 +3388,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3167,12 +3396,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3180,14 +3411,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3198,8 +3431,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: ret void
@@ -3209,15 +3440,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU-DISABLED: if.then:
; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]]
; AMDGPU-DISABLED: if.end:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -3230,15 +3461,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU-DISABLED: if.then:
; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]]
; AMDGPU-DISABLED: if.end:
-; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -3251,14 +3483,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; AMDGPU-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU-DISABLED: user_code.entry:
; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED-NEXT: ret void
; AMDGPU-DISABLED: worker.exit:
@@ -3269,8 +3503,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3286,8 +3518,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3296,12 +3526,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3309,8 +3541,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3319,12 +3549,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3332,9 +3564,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; AMDGPU-DISABLED-SAME: () #[[ATTR6]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3342,9 +3575,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; AMDGPU-DISABLED-SAME: () #[[ATTR1]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3352,8 +3586,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU-DISABLED-NEXT: ret void
;
@@ -3362,12 +3594,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU-DISABLED-NEXT: entry:
-; AMDGPU-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU-DISABLED-NEXT: ret void
;
;
@@ -3375,14 +3609,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3398,8 +3634,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3443,14 +3677,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3461,13 +3697,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3475,8 +3712,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3485,12 +3720,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3498,8 +3735,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3508,12 +3743,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3521,14 +3758,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3539,12 +3778,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3553,9 +3792,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; NVPTX-DISABLED-SAME: () #[[ATTR6:[0-9]+]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3563,9 +3803,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3573,8 +3814,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3583,12 +3822,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3596,9 +3837,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3606,9 +3848,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3616,14 +3859,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3634,12 +3879,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3647,8 +3893,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3657,12 +3901,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3670,8 +3916,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3680,12 +3924,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3693,14 +3939,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3711,12 +3959,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3724,8 +3973,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3734,12 +3981,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3747,8 +3996,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3757,12 +4004,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3770,14 +4019,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3788,12 +4039,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3801,8 +4053,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3811,12 +4061,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3824,8 +4076,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3834,12 +4084,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3847,14 +4099,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3865,8 +4119,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX-DISABLED-NEXT: ret void
@@ -3876,15 +4128,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX-DISABLED: if.then:
; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]]
; NVPTX-DISABLED: if.end:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -3897,15 +4149,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX-DISABLED: if.then:
; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]]
; NVPTX-DISABLED: if.end:
-; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -3918,14 +4171,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; NVPTX-DISABLED-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX-DISABLED: user_code.entry:
; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED-NEXT: ret void
; NVPTX-DISABLED: worker.exit:
@@ -3936,8 +4191,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3953,8 +4206,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3963,12 +4214,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3976,8 +4229,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -3986,12 +4237,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -3999,9 +4252,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; NVPTX-DISABLED-SAME: () #[[ATTR6]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -4009,9 +4263,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; NVPTX-DISABLED-SAME: () #[[ATTR1]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED-NEXT: ret void
;
;
@@ -4019,8 +4274,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19
; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR11]]
; NVPTX-DISABLED-NEXT: ret void
;
@@ -4029,12 +4282,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX-DISABLED-NEXT: entry:
-; NVPTX-DISABLED-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX-DISABLED-NEXT: ret void
;
;.
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
index 9576ff6ca6aee..344ee74036744 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll
@@ -139,19 +139,22 @@
@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_needed_l14(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -159,20 +162,23 @@ worker.exit: ; preds = %entry
ret void
}
-; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO.
-declare i32 @__kmpc_target_init(ptr);
+declare i32 @__kmpc_target_init(ptr)
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @no_parallel_region_in_here() #7
call void @unknown_no_openmp() #8
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @no_parallel_region_in_here() #1 {
entry:
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
@@ -190,25 +196,30 @@ omp_if.end: ; preds = %omp_if.then, %entry
ret void
}
+; Function Attrs: convergent
declare void @unknown_no_openmp() #2
+; Function Attrs: nounwind
declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_l22(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__1(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__1(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -216,46 +227,60 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @no_parallel_region_in_here() #7
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent
declare void @p0() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__2(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
@@ -263,45 +288,57 @@ declare void @__kmpc_get_shared_variables(ptr)
declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent
declare void @p1() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__3(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__4(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -309,76 +346,95 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
call void @simple_state_machine_interprocedural_before() #7
call void @no_parallel_region_in_here() #7
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @simple_state_machine_interprocedural_after() #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_before() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__5(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_after() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__6(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -386,85 +442,110 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%call = call i32 @unknown() #7
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__7(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent
declare i32 @unknown() #4
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__8(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__8(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__9(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__9(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -472,83 +553,107 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @unknown_no_openmp() #8
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__10(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__10(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__10(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__11(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__11(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__11(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_pure_l77(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__12(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__12(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -556,86 +661,111 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__12(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- %captured_vars_addrs = alloca [0 x ptr], align 8
- %captured_vars_addrs1 = alloca [0 x ptr], align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
+ %captured_vars_addrs1 = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs1.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs1 to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @unknown_no_openmp() #8
- %0 = load ptr, ptr %.global_tid..addr, align 8
+ %0 = load ptr, ptr %.global_tid..addr.cast, align 8
%1 = load i32, ptr %0, align 4
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @unknown_pure() #9
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr %captured_vars_addrs1.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__13(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__13(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__13(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent nounwind willreturn memory(read)
declare void @unknown_pure() #5
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__14(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p1() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__14(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__14(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__15(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__15(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -643,22 +773,27 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__15(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
%call = call i32 @omp_get_thread_num() #7
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
entry:
- %a.addr = alloca i32, align 4
- store i32 %a, ptr %a.addr, align 4
- %0 = load i32, ptr %a.addr, align 4
+ %a.addr = alloca ptr, align 8, addrspace(5)
+ %a.addr.cast = addrspacecast ptr addrspace(5) %a.addr to ptr
+ store i32 %a, ptr %a.addr.cast, align 4
+ %0 = load i32, ptr %a.addr.cast, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
@@ -666,7 +801,7 @@ if.then: ; preds = %entry
br label %return
if.end: ; preds = %entry
- %1 = load i32, ptr %a.addr, align 4
+ %1 = load i32, ptr %a.addr.cast, align 4
%sub = sub nsw i32 %1, 1
call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #7
call void @simple_state_machine_interprocedural_nested_recursive_after_after() #7
@@ -676,21 +811,25 @@ return: ; preds = %if.end, %if.then
ret void
}
+; Function Attrs: convergent
declare i32 @omp_get_thread_num(...) #4
+; Function Attrs: convergent noinline norecurse nounwind
define weak ptx_kernel void @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112(ptr %dyn) #0 {
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
- store i32 0, ptr %.zero.addr, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %worker.exit
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 %1, ptr %.threadid_temp., align 4
- call void @__omp_outlined__16(ptr %.threadid_temp., ptr %.zero.addr) #3
+ store i32 %1, ptr %.threadid_temp..cast, align 4
+ call void @__omp_outlined__16(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
ret void
@@ -698,120 +837,153 @@ worker.exit: ; preds = %entry
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__16(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @weak_callee_empty() #7
ret void
}
+; Function Attrs: convergent noinline nounwind
define weak hidden void @weak_callee_empty() #1 {
entry:
ret void
}
+; Function Attrs: convergent nounwind
declare i32 @__kmpc_single(ptr, i32) #6
+; Function Attrs: convergent nounwind
declare void @__kmpc_end_single(ptr, i32) #6
+; Function Attrs: convergent nounwind
declare void @__kmpc_barrier(ptr, i32) #6
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__17(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__17(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__17(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__18(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__18(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__18(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
+; Function Attrs: convergent noinline nounwind
define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_global_thread_num(ptr @2)
- call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr %captured_vars_addrs.cast, i64 0)
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__19(ptr noalias %.global_tid., ptr noalias %.bound_tid.) #0 {
entry:
- %.global_tid..addr = alloca ptr, align 8
- %.bound_tid..addr = alloca ptr, align 8
- store ptr %.global_tid., ptr %.global_tid..addr, align 8
- store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
+ %.global_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.global_tid..addr.cast = addrspacecast ptr addrspace(5) %.global_tid..addr to ptr
+ %.bound_tid..addr = alloca ptr, align 8, addrspace(5)
+ %.bound_tid..addr.cast = addrspacecast ptr addrspace(5) %.bound_tid..addr to ptr
+ store ptr %.global_tid., ptr %.global_tid..addr.cast, align 8
+ store ptr %.bound_tid., ptr %.bound_tid..addr.cast, align 8
call void @p0() #7
ret void
}
+; Function Attrs: convergent noinline norecurse nounwind
define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #0 {
entry:
- %.addr = alloca i16, align 2
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 0, ptr %.zero.addr, align 4
- store i16 %0, ptr %.addr, align 2
- store i32 %1, ptr %.addr1, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__19(ptr %.addr1, ptr %.zero.addr) #3
+ %.addr = alloca ptr, align 8, addrspace(5)
+ %.addr.cast = addrspacecast ptr addrspace(5) %.addr to ptr
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i16 %0, ptr %.addr.cast, align 2
+ store i32 %1, ptr %.addr1.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__19(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
-attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #1 = { convergent noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #2 = { convergent "frame-pointer"="none" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #3 = { nounwind }
attributes #4 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
-attributes #5 = { convergent nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
+attributes #5 = { convergent nounwind willreturn memory(read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" }
attributes #6 = { convergent nounwind }
attributes #7 = { convergent }
attributes #8 = { convergent "llvm.assume"="omp_no_openmp" }
-attributes #9 = { convergent nounwind readonly willreturn }
+attributes #9 = { convergent nounwind willreturn memory(read) }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
-!llvm.module.flags = !{!16, !17, !18}
+!llvm.module.flags = !{!8, !9, !10}
!0 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural", i32 39, i32 2}
!1 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_no_openmp_attr", i32 66, i32 4}
@@ -821,9 +993,9 @@ attributes #9 = { convergent nounwind readonly willreturn }
!5 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine_interprocedural_nested_recursive", i32 92, i32 6}
!6 = !{i32 0, i32 20, i32 171331627, !"no_state_machine_weak_callee", i32 112, i32 7}
!7 = !{i32 0, i32 20, i32 171331627, !"simple_state_machine", i32 22, i32 1}
-!16 = !{i32 1, !"wchar_size", i32 4}
-!17 = !{i32 7, !"openmp", i32 50}
-!18 = !{i32 7, !"openmp-device", i32 50}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{i32 7, !"openmp", i32 50}
+!10 = !{i32 7, !"openmp-device", i32 50}
;.
; AMDGPU1: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU1: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
@@ -913,14 +1085,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -931,8 +1105,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU1-NEXT: ret void
@@ -976,14 +1148,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -994,13 +1168,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1008,8 +1183,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU1-NEXT: ret void
;
@@ -1018,12 +1191,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1031,8 +1206,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1041,12 +1214,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1054,14 +1229,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1072,12 +1249,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; AMDGPU1-NEXT: ret void
;
@@ -1086,9 +1263,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; AMDGPU1-SAME: () #[[ATTR6:[0-9]+]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1096,9 +1274,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; AMDGPU1-SAME: () #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1106,8 +1285,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1116,12 +1293,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1129,9 +1308,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; AMDGPU1-SAME: () #[[ATTR6]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1139,9 +1319,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; AMDGPU1-SAME: () #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1149,14 +1330,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1167,12 +1350,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1180,8 +1364,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1190,12 +1372,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1203,8 +1387,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1213,12 +1395,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1226,14 +1410,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1244,12 +1430,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1257,8 +1444,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1267,12 +1452,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1280,8 +1467,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1290,12 +1475,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1303,14 +1490,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1321,12 +1510,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1334,8 +1524,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1344,12 +1532,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1357,8 +1547,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1367,12 +1555,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1380,14 +1570,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1398,8 +1590,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__15
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU1-NEXT: ret void
@@ -1409,15 +1599,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU1: if.then:
; AMDGPU1-NEXT: br label [[RETURN:%.*]]
; AMDGPU1: if.end:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -1430,15 +1620,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; AMDGPU1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; AMDGPU1-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; AMDGPU1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU1: if.then:
; AMDGPU1-NEXT: br label [[RETURN:%.*]]
; AMDGPU1: if.end:
-; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; AMDGPU1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -1451,14 +1642,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; AMDGPU1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; AMDGPU1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU1: user_code.entry:
; AMDGPU1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU1-NEXT: ret void
; AMDGPU1: worker.exit:
@@ -1469,8 +1662,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__16
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU1-NEXT: ret void
;
@@ -1486,8 +1677,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1496,12 +1685,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1509,8 +1700,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1519,12 +1708,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1532,9 +1723,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; AMDGPU1-SAME: () #[[ATTR6]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1542,9 +1734,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; AMDGPU1-SAME: () #[[ATTR1]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU1-NEXT: ret void
;
;
@@ -1552,8 +1745,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19
; AMDGPU1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU1-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU1-NEXT: ret void
;
@@ -1562,12 +1753,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; AMDGPU1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU1-NEXT: entry:
-; AMDGPU1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU1-NEXT: ret void
;
;
@@ -1575,14 +1768,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1593,8 +1788,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX1-NEXT: ret void
@@ -1638,14 +1831,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1656,13 +1851,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1670,8 +1866,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX1-NEXT: ret void
;
@@ -1680,12 +1874,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1693,8 +1889,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1703,12 +1897,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1716,14 +1912,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1734,12 +1932,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX1-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; NVPTX1-NEXT: ret void
;
@@ -1748,9 +1946,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; NVPTX1-SAME: () #[[ATTR6:[0-9]+]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1758,9 +1957,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; NVPTX1-SAME: () #[[ATTR1]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1768,8 +1968,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1778,12 +1976,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1791,9 +1991,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; NVPTX1-SAME: () #[[ATTR6]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1801,9 +2002,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; NVPTX1-SAME: () #[[ATTR1]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1811,14 +2013,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1829,12 +2033,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1842,8 +2047,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1852,12 +2055,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1865,8 +2070,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1875,12 +2078,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1888,14 +2093,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1906,12 +2113,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1919,8 +2127,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1929,12 +2135,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1942,8 +2150,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -1952,12 +2158,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -1965,14 +2173,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -1983,12 +2193,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX1-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -1996,8 +2207,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2006,12 +2215,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -2019,8 +2230,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p1() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2029,12 +2238,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -2042,14 +2253,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -2060,8 +2273,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__15
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX1-NEXT: ret void
@@ -2071,15 +2282,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX1: if.then:
; NVPTX1-NEXT: br label [[RETURN:%.*]]
; NVPTX1: if.end:
-; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -2092,15 +2303,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; NVPTX1-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; NVPTX1-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; NVPTX1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX1-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX1: if.then:
; NVPTX1-NEXT: br label [[RETURN:%.*]]
; NVPTX1: if.end:
-; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; NVPTX1-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -2113,14 +2325,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; NVPTX1-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; NVPTX1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX1: user_code.entry:
; NVPTX1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: call void @__kmpc_target_deinit()
; NVPTX1-NEXT: ret void
; NVPTX1: worker.exit:
@@ -2131,8 +2345,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__16
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX1-NEXT: ret void
;
@@ -2148,8 +2360,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2158,12 +2368,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -2171,8 +2383,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2181,12 +2391,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -2194,9 +2406,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; NVPTX1-SAME: () #[[ATTR6]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -2204,9 +2417,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; NVPTX1-SAME: () #[[ATTR1]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX1-NEXT: ret void
;
;
@@ -2214,8 +2428,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19
; NVPTX1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX1-NEXT: call void @p0() #[[ATTR11]]
; NVPTX1-NEXT: ret void
;
@@ -2224,12 +2436,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX1-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; NVPTX1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX1-NEXT: entry:
-; NVPTX1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX1-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX1-NEXT: ret void
;
;
@@ -2237,14 +2451,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2255,8 +2471,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU2-NEXT: ret void
@@ -2300,14 +2514,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2318,13 +2534,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2332,8 +2549,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU2-NEXT: ret void
;
@@ -2342,12 +2557,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2355,8 +2572,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2365,12 +2580,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2378,14 +2595,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2396,12 +2615,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; AMDGPU2-NEXT: ret void
;
@@ -2410,9 +2629,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; AMDGPU2-SAME: () #[[ATTR6:[0-9]+]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2420,9 +2640,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; AMDGPU2-SAME: () #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2430,8 +2651,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2440,12 +2659,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2453,9 +2674,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; AMDGPU2-SAME: () #[[ATTR6]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2463,9 +2685,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; AMDGPU2-SAME: () #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2473,14 +2696,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2491,12 +2716,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2504,8 +2730,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2514,12 +2738,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2527,8 +2753,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2537,12 +2761,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2550,14 +2776,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2568,12 +2796,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2581,8 +2810,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2591,12 +2818,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2604,8 +2833,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2614,12 +2841,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2627,14 +2856,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2645,12 +2876,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2658,8 +2890,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2668,12 +2898,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2681,8 +2913,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2691,12 +2921,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2704,14 +2936,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2722,8 +2956,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__15
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU2-NEXT: ret void
@@ -2733,15 +2965,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU2: if.then:
; AMDGPU2-NEXT: br label [[RETURN:%.*]]
; AMDGPU2: if.end:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -2754,15 +2986,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; AMDGPU2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; AMDGPU2-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; AMDGPU2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU2: if.then:
; AMDGPU2-NEXT: br label [[RETURN:%.*]]
; AMDGPU2: if.end:
-; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; AMDGPU2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -2775,14 +3008,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; AMDGPU2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; AMDGPU2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU2: user_code.entry:
; AMDGPU2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU2-NEXT: ret void
; AMDGPU2: worker.exit:
@@ -2793,8 +3028,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__16
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU2-NEXT: ret void
;
@@ -2810,8 +3043,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2820,12 +3051,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2833,8 +3066,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2843,12 +3074,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2856,9 +3089,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; AMDGPU2-SAME: () #[[ATTR6]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2866,9 +3100,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; AMDGPU2-SAME: () #[[ATTR1]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU2-NEXT: ret void
;
;
@@ -2876,8 +3111,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19
; AMDGPU2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU2-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU2-NEXT: ret void
;
@@ -2886,12 +3119,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; AMDGPU2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU2-NEXT: entry:
-; AMDGPU2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU2-NEXT: ret void
;
;
@@ -2899,14 +3134,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -2917,8 +3154,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; AMDGPU3-NEXT: ret void
@@ -2962,14 +3197,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -2980,13 +3217,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -2994,8 +3232,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; AMDGPU3-NEXT: ret void
;
@@ -3004,12 +3240,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3017,8 +3255,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3027,12 +3263,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3040,14 +3278,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3058,12 +3298,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; AMDGPU3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; AMDGPU3-NEXT: ret void
;
@@ -3072,9 +3312,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; AMDGPU3-SAME: () #[[ATTR6:[0-9]+]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3082,9 +3323,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; AMDGPU3-SAME: () #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3092,8 +3334,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3102,12 +3342,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3115,9 +3357,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; AMDGPU3-SAME: () #[[ATTR6]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3125,9 +3368,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; AMDGPU3-SAME: () #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3135,14 +3379,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3153,12 +3399,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3166,8 +3413,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3176,12 +3421,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3189,8 +3436,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3199,12 +3444,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3212,14 +3459,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3230,12 +3479,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3243,8 +3493,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3253,12 +3501,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3266,8 +3516,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3276,12 +3524,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3289,14 +3539,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3307,12 +3559,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__12
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; AMDGPU3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3320,8 +3573,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3330,12 +3581,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3343,8 +3596,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p1() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3353,12 +3604,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3366,14 +3619,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3384,8 +3639,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__15
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; AMDGPU3-NEXT: ret void
@@ -3395,15 +3648,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU3: if.then:
; AMDGPU3-NEXT: br label [[RETURN:%.*]]
; AMDGPU3: if.end:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -3416,15 +3669,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; AMDGPU3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; AMDGPU3-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; AMDGPU3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; AMDGPU3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; AMDGPU3: if.then:
; AMDGPU3-NEXT: br label [[RETURN:%.*]]
; AMDGPU3: if.end:
-; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; AMDGPU3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; AMDGPU3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; AMDGPU3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -3437,14 +3691,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; AMDGPU3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; AMDGPU3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; AMDGPU3: user_code.entry:
; AMDGPU3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: call void @__kmpc_target_deinit()
; AMDGPU3-NEXT: ret void
; AMDGPU3: worker.exit:
@@ -3455,8 +3711,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__16
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; AMDGPU3-NEXT: ret void
;
@@ -3472,8 +3726,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3482,12 +3734,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3495,8 +3749,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3505,12 +3757,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3518,9 +3772,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; AMDGPU3-SAME: () #[[ATTR6]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3528,9 +3783,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; AMDGPU3-SAME: () #[[ATTR1]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU3-NEXT: ret void
;
;
@@ -3538,8 +3794,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19
; AMDGPU3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; AMDGPU3-NEXT: call void @p0() #[[ATTR11]]
; AMDGPU3-NEXT: ret void
;
@@ -3548,12 +3802,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; AMDGPU3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; AMDGPU3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; AMDGPU3-NEXT: entry:
-; AMDGPU3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; AMDGPU3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; AMDGPU3-NEXT: ret void
;
;
@@ -3561,14 +3817,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3579,8 +3837,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX2-NEXT: ret void
@@ -3624,14 +3880,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3642,13 +3900,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3656,8 +3915,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX2-NEXT: ret void
;
@@ -3666,12 +3923,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3679,8 +3938,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3689,12 +3946,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3702,14 +3961,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3720,12 +3981,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX2-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; NVPTX2-NEXT: ret void
;
@@ -3734,9 +3995,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; NVPTX2-SAME: () #[[ATTR6:[0-9]+]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3744,9 +4006,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; NVPTX2-SAME: () #[[ATTR1]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3754,8 +4017,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3764,12 +4025,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3777,9 +4040,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; NVPTX2-SAME: () #[[ATTR6]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3787,9 +4051,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; NVPTX2-SAME: () #[[ATTR1]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3797,14 +4062,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3815,12 +4082,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3828,8 +4096,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3838,12 +4104,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3851,8 +4119,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3861,12 +4127,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3874,14 +4142,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3892,12 +4162,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3905,8 +4176,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3915,12 +4184,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3928,8 +4199,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3938,12 +4207,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -3951,14 +4222,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -3969,12 +4242,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX2-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -3982,8 +4256,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -3992,12 +4264,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -4005,8 +4279,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p1() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4015,12 +4287,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -4028,14 +4302,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -4046,8 +4322,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__15
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX2-NEXT: ret void
@@ -4057,15 +4331,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX2: if.then:
; NVPTX2-NEXT: br label [[RETURN:%.*]]
; NVPTX2: if.end:
-; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -4078,15 +4352,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; NVPTX2-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; NVPTX2-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; NVPTX2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX2-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX2-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX2: if.then:
; NVPTX2-NEXT: br label [[RETURN:%.*]]
; NVPTX2: if.end:
-; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; NVPTX2-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -4099,14 +4374,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; NVPTX2-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; NVPTX2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX2: user_code.entry:
; NVPTX2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: call void @__kmpc_target_deinit()
; NVPTX2-NEXT: ret void
; NVPTX2: worker.exit:
@@ -4117,8 +4394,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__16
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX2-NEXT: ret void
;
@@ -4134,8 +4409,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4144,12 +4417,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -4157,8 +4432,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4167,12 +4440,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -4180,9 +4455,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; NVPTX2-SAME: () #[[ATTR6]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -4190,9 +4466,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; NVPTX2-SAME: () #[[ATTR1]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX2-NEXT: ret void
;
;
@@ -4200,8 +4477,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19
; NVPTX2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX2-NEXT: call void @p0() #[[ATTR11]]
; NVPTX2-NEXT: ret void
;
@@ -4210,12 +4485,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX2-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; NVPTX2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX2-NEXT: entry:
-; NVPTX2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX2-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX2-NEXT: ret void
;
;
@@ -4223,14 +4500,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_needed_l14
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_needed_l14_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3:[0-9]+]]
-; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4241,8 +4520,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9:[0-9]+]]
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10:[0-9]+]]
; NVPTX3-NEXT: ret void
@@ -4286,14 +4563,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_l22_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4304,13 +4583,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__1
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4318,8 +4598,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11:[0-9]+]]
; NVPTX3-NEXT: ret void
;
@@ -4328,12 +4606,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4341,8 +4621,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4351,12 +4629,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4364,14 +4644,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4382,12 +4664,12 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR9]]
; NVPTX3-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR9]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR9]]
; NVPTX3-NEXT: ret void
;
@@ -4396,9 +4678,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
; NVPTX3-SAME: () #[[ATTR6:[0-9]+]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4406,9 +4689,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
; NVPTX3-SAME: () #[[ATTR1]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__17, ptr @__omp_outlined__17_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4416,8 +4700,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4426,12 +4708,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4439,9 +4723,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
; NVPTX3-SAME: () #[[ATTR6]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4449,9 +4734,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
; NVPTX3-SAME: () #[[ATTR1]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__18, ptr @__omp_outlined__18_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4459,14 +4745,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4477,12 +4765,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR11]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4490,8 +4779,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4500,12 +4787,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4513,8 +4802,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4523,12 +4810,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__8(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4536,14 +4825,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4554,12 +4845,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__9
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4567,8 +4859,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4577,12 +4867,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__10(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4590,8 +4882,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4600,12 +4890,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__11(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4613,14 +4905,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_pure_l77_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__12(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4631,12 +4925,13 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__12
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
; NVPTX3-NEXT: call void @unknown_no_openmp() #[[ATTR10]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 undef, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__14, ptr @__omp_outlined__14_wrapper, ptr [[CAPTURED_VARS_ADDRS1_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4644,8 +4939,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4654,12 +4947,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__13(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4667,8 +4962,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p1() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4677,12 +4970,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__14(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4690,14 +4985,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_simple_state_machine_interprocedural_nested_recursive_l92_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__15(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4708,8 +5005,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__15
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR9]]
; NVPTX3-NEXT: ret void
@@ -4719,15 +5014,15 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR6]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: store i32 [[A]], ptr addrspace(5) [[A_ADDR]], align 4
+; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX3: if.then:
; NVPTX3-NEXT: br label [[RETURN:%.*]]
; NVPTX3: if.end:
-; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR9]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR9]]
@@ -4740,15 +5035,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
; NVPTX3-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[A_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+; NVPTX3-NEXT: store i32 [[A]], ptr [[A_ADDR_CAST]], align 4
+; NVPTX3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX3-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; NVPTX3-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; NVPTX3: if.then:
; NVPTX3-NEXT: br label [[RETURN:%.*]]
; NVPTX3: if.end:
-; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; NVPTX3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_CAST]], align 4
; NVPTX3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR11]]
; NVPTX3-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR11]]
@@ -4761,14 +5057,16 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; NVPTX3-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112_kernel_environment, ptr [[DYN]])
; NVPTX3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; NVPTX3: user_code.entry:
; NVPTX3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: call void @__omp_outlined__16(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: call void @__kmpc_target_deinit()
; NVPTX3-NEXT: ret void
; NVPTX3: worker.exit:
@@ -4779,8 +5077,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__16
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @weak_callee_empty() #[[ATTR9]]
; NVPTX3-NEXT: ret void
;
@@ -4796,8 +5092,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4806,12 +5100,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__17(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4819,8 +5115,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4829,12 +5123,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__18(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;
@@ -4842,9 +5138,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
; NVPTX3-SAME: () #[[ATTR6]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4852,9 +5149,10 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
; NVPTX3-SAME: () #[[ATTR1]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
-; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__19, ptr @__omp_outlined__19_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX3-NEXT: ret void
;
;
@@ -4862,8 +5160,6 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19
; NVPTX3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
; NVPTX3-NEXT: call void @p0() #[[ATTR11]]
; NVPTX3-NEXT: ret void
;
@@ -4872,12 +5168,14 @@ attributes #9 = { convergent nounwind readonly willreturn }
; NVPTX3-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
; NVPTX3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
; NVPTX3-NEXT: entry:
-; NVPTX3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; NVPTX3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX3-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX3-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX3-NEXT: call void @__omp_outlined__19(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR3]]
; NVPTX3-NEXT: ret void
;
;.
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 1a629ecfee06d..e91f1608f066f 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -89,12 +89,10 @@
;; }
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t }
+%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
+%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
%struct.kmp_task_t = type { ptr, ptr, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t }
%union.kmp_cmplrdata_t = type { ptr }
-%struct.anon = type {}
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
-%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@@ -105,6 +103,7 @@
@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
+; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
@@ -225,8 +224,10 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -234,16 +235,18 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -251,8 +254,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -260,8 +263,10 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -302,16 +307,18 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; AMDGPU-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -319,8 +326,8 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -328,8 +335,10 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -369,16 +378,18 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED2-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -386,36 +397,38 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_l5_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr) #6
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void @__omp_outlined__(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -425,15 +438,17 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -443,15 +458,17 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -461,15 +478,17 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -479,15 +498,17 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -497,15 +518,17 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -515,13 +538,15 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
br label %for.cond
for.cond: ; preds = %for.body, %entry
@@ -530,17 +555,16 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #6
ret void
for.body: ; preds = %for.cond
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !22
+ br label %for.cond, !llvm.loop !16
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -579,80 +603,101 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
+define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__1(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
@@ -661,8 +706,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -670,16 +717,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -687,8 +736,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -696,8 +745,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -738,16 +789,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -755,8 +808,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -764,8 +817,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -805,16 +860,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -822,37 +879,39 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void @__omp_outlined__2(ptr %.threadid_temp., ptr %.zero.addr) #6
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void @__omp_outlined__2(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
@@ -864,8 +923,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -873,7 +933,8 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
@@ -884,8 +945,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -893,7 +955,8 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
@@ -905,8 +968,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -914,7 +978,8 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
; AMDGPU-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
@@ -926,8 +991,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -935,7 +1001,8 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
@@ -946,8 +1013,9 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -955,7 +1023,8 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
@@ -966,15 +1035,17 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
- call void @use(ptr nocapture %x) #10
+ call void @use(ptr captures(none) %x) #6
br label %for.cond
for.cond: ; preds = %for.body, %entry
@@ -983,17 +1054,17 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !25
+ br label %for.cond, !llvm.loop !19
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
+
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -1032,91 +1103,113 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
+define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__3(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
-
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1124,16 +1217,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1141,8 +1236,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -1150,8 +1245,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1192,16 +1289,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1209,8 +1308,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -1218,8 +1317,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1259,16 +1360,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1276,36 +1379,38 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void @__omp_outlined__4(ptr %.threadid_temp., ptr %.zero.addr) #6
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void @__omp_outlined__4(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1315,16 +1420,18 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1334,16 +1441,18 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
; AMDGPU-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1353,16 +1462,18 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
; AMDGPU-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1372,16 +1483,18 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1391,16 +1504,18 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__4
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1410,14 +1525,16 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [1 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
br label %for.cond
@@ -1427,19 +1544,18 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
- store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
+ store ptr %x, ptr %captured_vars_addrs.cast, align 8, !tbaa !20
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs.cast, i64 1)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !28
+ br label %for.cond, !llvm.loop !22
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
@@ -1496,97 +1612,118 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %0 = load i32, ptr %x, align 4, !tbaa !18
+ %0 = load i32, ptr %x, align 4, !tbaa !12
%inc = add nsw i32 %0, 1
- store i32 %inc, ptr %x, align 4, !tbaa !18
- call void @unknown() #11
+ store i32 %inc, ptr %x, align 4, !tbaa !12
+ call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
+define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- %2 = load ptr, ptr %global_args, align 8
- %3 = load ptr, ptr %2, align 8, !tbaa !26
- call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ %2 = load ptr, ptr %global_args.cast, align 8
+ %3 = load ptr, ptr %2, align 8, !tbaa !20
+ call void @__omp_outlined__5(ptr %.addr1.cast, ptr %.zero.addr.cast, ptr %3) #3
ret void
}
@@ -1595,8 +1732,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1604,16 +1743,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1621,8 +1762,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -1630,8 +1771,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1672,16 +1815,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1689,8 +1834,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -1698,8 +1843,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -1739,16 +1886,18 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -1756,36 +1905,38 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void @__omp_outlined__6(ptr %.threadid_temp., ptr %.zero.addr) #6
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void @__omp_outlined__6(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]]
; AMDGPU: region.check.tid:
; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1809,16 +1960,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]]
; NVPTX: region.check.tid:
; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1842,16 +1995,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED1: for.cond:
@@ -1862,16 +2017,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: for.body:
-; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; AMDGPU-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), align 4, !tbaa [[TBAA12]]
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU-DISABLED2: for.cond:
@@ -1882,16 +2039,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; AMDGPU-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: for.body:
-; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
@@ -1902,16 +2061,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: for.body:
-; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED1-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED1-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__6
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), align 4, !tbaa [[TBAA12]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
@@ -1922,16 +2083,18 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: call void @spmd_amenable() #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: for.body:
-; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-DISABLED2-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20]]
+; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[DOTGLOBAL_TID_]] to ptr addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[TMP0]], align 4, !tbaa [[TBAA12]]
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-DISABLED2-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [1 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
- store i32 42, ptr %x, align 4, !tbaa !18
+ store i32 42, ptr %x, align 4, !tbaa !12
br label %for.cond
for.cond: ; preds = %for.body, %entry
@@ -1940,19 +2103,18 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #6
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
- store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs, i64 1)
+ store ptr %x, ptr %captured_vars_addrs.cast, align 8, !tbaa !20
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr %captured_vars_addrs.cast, i64 1)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !29
+ br label %for.cond, !llvm.loop !23
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
@@ -2009,97 +2171,118 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %0 = load i32, ptr %x, align 4, !tbaa !18
+ %0 = load i32, ptr %x, align 4, !tbaa !12
%inc = add nsw i32 %0, 1
- store i32 %inc, ptr %x, align 4, !tbaa !18
- call void @unknowni32p(ptr %x) #11
+ store i32 %inc, ptr %x, align 4, !tbaa !12
+ call void @unknowni32p(ptr %x) #7
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
+define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- %2 = load ptr, ptr %global_args, align 8
- %3 = load ptr, ptr %2, align 8, !tbaa !26
- call void @__omp_outlined__7(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ %2 = load ptr, ptr %global_args.cast, align 8
+ %3 = load ptr, ptr %2, align 8, !tbaa !20
+ call void @__omp_outlined__7(ptr %.addr1.cast, ptr %.zero.addr.cast, ptr %3) #3
ret void
}
@@ -2109,8 +2292,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2147,7 +2332,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
@@ -2155,8 +2340,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2192,7 +2379,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -2200,8 +2387,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2238,15 +2427,17 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-DISABLED1-NEXT: ret void
; AMDGPU-DISABLED1: user_code.entry:
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -2254,7 +2445,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; AMDGPU-DISABLED2-NEXT: ret void
; AMDGPU-DISABLED2: user_code.entry:
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -2262,8 +2453,10 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2299,15 +2492,17 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED1-NEXT: ret void
; NVPTX-DISABLED1: user_code.entry:
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -2315,30 +2510,31 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED2-NEXT: ret void
; NVPTX-DISABLED2: user_code.entry:
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void @__omp_outlined__8(ptr %.threadid_temp., ptr %.zero.addr) #6
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void @__omp_outlined__8(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #3
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -2377,7 +2573,7 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #7
ret void
}
@@ -2387,7 +2583,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; AMDGPU-SAME: () #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2432,7 +2629,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
@@ -2440,7 +2637,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2484,7 +2682,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
@@ -2492,7 +2690,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; AMDGPU-DISABLED1-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED1-NEXT: entry:
; AMDGPU-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2537,14 +2736,15 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; AMDGPU-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; AMDGPU-DISABLED2-SAME: () #[[ATTR0]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; AMDGPU-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -2554,7 +2754,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; AMDGPU-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-DISABLED2-NEXT: br label [[COMMON_RET]]
;
@@ -2562,7 +2762,8 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-DISABLED1-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -2606,14 +2807,15 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-DISABLED1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED1-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED1-NEXT: br label [[COMMON_RET]]
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED2-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
; NVPTX-DISABLED2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-DISABLED2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -2623,30 +2825,31 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-DISABLED2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]]
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-DISABLED2-NEXT: call void @__kmpc_target_deinit()
; NVPTX-DISABLED2-NEXT: br label [[COMMON_RET]]
;
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
%2 = call ptr @__kmpc_omp_task_alloc(ptr @1, i32 %1, i32 1, i64 40, i64 0, ptr @"_omp_task_entry$")
%3 = call i32 @__kmpc_omp_task(ptr @1, i32 %1, ptr %2)
- call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs, i64 0)
+ call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper, ptr %captured_vars_addrs.cast, i64 0)
call void @__kmpc_target_deinit()
br label %common.ret
}
; Function Attrs: alwaysinline convergent nounwind
-define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 {
+define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #2 {
; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined.
; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-NEXT: entry:
@@ -2684,43 +2887,44 @@ define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id.,
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #6
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #3 {
+define internal i32 @"_omp_task_entry$"(i32 %0, ptr noalias %1) #1 {
entry:
%2 = getelementptr inbounds %struct.kmp_task_t, ptr %1, i32 0, i32 2
- %3 = load ptr, ptr %1, align 8, !tbaa !30
- call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #6
+ %3 = load ptr, ptr %1, align 8, !tbaa !24
+ call void @.omp_outlined.(i32 %0, ptr %2, ptr null, ptr null, ptr %1, ptr %3) #3
ret i32 0
}
; Function Attrs: nounwind
-declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #6
+declare ptr @__kmpc_omp_task_alloc(ptr, i32, i32, i64, i64, ptr) #3
; Function Attrs: nounwind
-declare i32 @__kmpc_omp_task(ptr, i32, ptr) #6
+declare i32 @__kmpc_omp_task(ptr, i32, ptr) #3
; Function Attrs: nosync nounwind
-declare void @__kmpc_free_shared(ptr nocapture, i64) #8
+declare void @__kmpc_free_shared(ptr captures(none), i64) #4
; Function Attrs: nofree nosync nounwind
-declare ptr @__kmpc_alloc_shared(i64) #7
+declare ptr @__kmpc_alloc_shared(i64) #5
+
+; Function Attrs: convergent
+declare void @use(ptr captures(none)) #6
; Function Attrs: convergent
-declare void @use(ptr nocapture) #5
+declare void @unknown() #7
; Function Attrs: convergent
-declare void @unknown() #2
-declare void @unknowni32p(ptr) #2
+declare void @unknowni32p(ptr) #7
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #8
-; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr, ptr) {
+define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; AMDGPU-NEXT: ret i32 0
@@ -2751,21 +2955,19 @@ define weak i32 @__kmpc_target_init(ptr, ptr) {
declare void @__kmpc_get_shared_variables(ptr)
; Function Attrs: alwaysinline
-declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #9
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #8
; Function Attrs: convergent
-declare void @spmd_amenable() #5
+declare void @spmd_amenable() #6
; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(ptr) #6
+declare i32 @__kmpc_global_thread_num(ptr) #3
declare void @__kmpc_target_deinit()
-
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -2804,101 +3006,120 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #7
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
+define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-NEXT: ret void
;
; AMDGPU-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED1-NEXT: entry:
-; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED1-NEXT: ret void
;
; AMDGPU-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; AMDGPU-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-DISABLED2-NEXT: entry:
-; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; AMDGPU-DISABLED2-NEXT: ret void
;
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-DISABLED1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED1-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED1-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED1-NEXT: ret void
;
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper
; NVPTX-DISABLED2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-DISABLED2-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-DISABLED2-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR4]]
; NVPTX-DISABLED2-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__9(ptr %.addr1.cast, ptr %.zero.addr.cast) #3
ret void
}
-declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
+declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" }
-attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
-attributes #2 = { convergent }
-attributes #3 = { convergent norecurse nounwind }
-attributes #4 = { alwaysinline }
-attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" }
-attributes #6 = { nounwind }
-attributes #7 = { nofree nosync nounwind }
-attributes #8 = { nosync nounwind }
-attributes #9 = { alwaysinline convergent nounwind }
-attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
-attributes #11 = { convergent }
+attributes #1 = { convergent norecurse nounwind }
+attributes #2 = { alwaysinline convergent nounwind }
+attributes #3 = { nounwind }
+attributes #4 = { nosync nounwind }
+attributes #5 = { nofree nosync nounwind }
+attributes #6 = { convergent "llvm.assume"="ompx_spmd_amenable" }
+attributes #7 = { convergent }
+attributes #8 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #9 = { alwaysinline }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
-!llvm.module.flags = !{!12, !13, !14, !15, !16}
-!llvm.ident = !{!17}
+!llvm.module.flags = !{!6, !7, !8, !9, !10}
+!llvm.ident = !{!11}
!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -2906,27 +3127,27 @@ attributes #11 = { convergent }
!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-!12 = !{i32 1, !"wchar_size", i32 4}
-!13 = !{i32 7, !"openmp", i32 50}
-!14 = !{i32 7, !"openmp-device", i32 50}
-!15 = !{i32 8, !"PIC Level", i32 2}
-!16 = !{i32 7, !"frame-pointer", i32 2}
-!17 = !{!"clang version 14.0.0"}
-!18 = !{!19, !19, i64 0}
-!19 = !{!"int", !20, i64 0}
-!20 = !{!"omnipotent char", !21, i64 0}
-!21 = !{!"Simple C/C++ TBAA"}
-!22 = distinct !{!22, !23, !24}
-!23 = !{!"llvm.loop.mustprogress"}
-!24 = !{!"llvm.loop.unroll.disable"}
-!25 = distinct !{!25, !23, !24}
-!26 = !{!27, !27, i64 0}
-!27 = !{!"any pointer", !20, i64 0}
-!28 = distinct !{!28, !23, !24}
-!29 = distinct !{!29, !23, !24}
-!30 = !{!31, !27, i64 0}
-!31 = !{!"kmp_task_t_with_privates", !32, i64 0}
-!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
+!6 = !{i32 1, !"wchar_size", i32 4}
+!7 = !{i32 7, !"openmp", i32 50}
+!8 = !{i32 7, !"openmp-device", i32 50}
+!9 = !{i32 8, !"PIC Level", i32 2}
+!10 = !{i32 7, !"frame-pointer", i32 2}
+!11 = !{!"clang version 14.0.0"}
+!12 = !{!13, !13, i64 0}
+!13 = !{!"int", !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C/C++ TBAA"}
+!16 = distinct !{!16, !17, !18}
+!17 = !{!"llvm.loop.mustprogress"}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = distinct !{!19, !17, !18}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"any pointer", !14, i64 0}
+!22 = distinct !{!22, !17, !18}
+!23 = distinct !{!23, !17, !18}
+!24 = !{!25, !21, i64 0}
+!25 = !{!"kmp_task_t_with_privates", !26, i64 0}
+!26 = !{!"kmp_task_t", !21, i64 0, !21, i64 8, !13, i64 16, !14, i64 24, !14, i64 32}
;.
; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
diff --git a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
index 953ecb2ddd8a6..63b54bfddf9de 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll
@@ -8,7 +8,7 @@
; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode
; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode
;
-target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target datalayout = "A5-G1-ni:7:8:9-p7:160:256:256:32-p8:128:128-p9:192:256:256:32"
target triple = "amdgcn-amd-amdhsa"
%struct.ident_t = type { i32, i32, i32, i32, ptr }
@@ -42,7 +42,8 @@ target triple = "amdgcn-amd-amdhsa"
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak_odr amdgpu_kernel void @__omp_offloading_20_11e3950_main_l12(ptr %dyn, i64 noundef %nxyz, i64 noundef %ng, ptr noundef nonnull align 8 dereferenceable(8) %aa) local_unnamed_addr #0 {
entry:
- %ng1 = alloca i32, align 4
+ %ng1 = alloca ptr, align 8, addrspace(5)
+ %ng1.cast = addrspacecast ptr addrspace(5) %ng1 to ptr
%captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5)
%0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_kernel_environment to ptr), ptr %dyn)
%exec_user_code = icmp eq i32 %0, -1
@@ -50,7 +51,7 @@ entry:
user_code.entry: ; preds = %entry
%captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
- store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7
+ store ptr %ng1.cast, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !6
call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2)
call void @__kmpc_target_deinit()
br label %common.ret
@@ -59,190 +60,189 @@ common.ret: ; preds = %user_code.entry, %e
ret void
}
-; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) #1
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) captures(none)) #1
-; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn
-define internal void @__omp_outlined__(ptr noalias nocapture %.global_tid., ptr noalias nocapture %.bound_tid., ptr nocapture nonnull align 4 %ng, ptr nocapture nonnull align 8 %aa) #2 {
+; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none)
+define internal void @__omp_outlined__(ptr noalias captures(none) %.global_tid., ptr noalias captures(none) %.bound_tid., ptr nonnull align 4 captures(none) %ng, ptr nonnull align 8 captures(none) %aa) #2 {
entry:
- %isspmd = load i32, ptr addrspace(3) @IsSPMDMode
- store i32 %isspmd, ptr @G
+ %isspmd = load i32, ptr addrspace(3) @IsSPMDMode, align 4
+ store i32 %isspmd, ptr @G, align 4
ret void
}
-; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) captures(none)) #1
-; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 noundef %1) #3 {
entry:
- %isspmd = load i32, ptr addrspace(3) @IsSPMDMode
- store i32 %isspmd, ptr @G
+ %isspmd = load i32, ptr addrspace(3) @IsSPMDMode, align 4
+ store i32 %isspmd, ptr @G, align 4
ret void
}
-; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workitem.id.x() #4
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.amdgcn.workitem.id.x() #4
-; Function Attrs: nounwind readnone speculatable willreturn
-declare i32 @llvm.amdgcn.workgroup.id.x() #4
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef i32 @llvm.amdgcn.workgroup.id.x() #4
-; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #4
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare noundef nonnull align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #4
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.umin.i32(i32, i32) #5
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.umin.i32(i32, i32) #4
-; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn
-declare void @llvm.assume(i1 noundef) #6
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #5
-; Function Attrs: convergent nounwind willreturn
-declare void @llvm.amdgcn.s.barrier() #7
+; Function Attrs: convergent nocallback nofree nounwind willreturn
+declare void @llvm.amdgcn.s.barrier() #6
; Function Attrs: convergent mustprogress noinline nounwind willreturn
-define internal fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() unnamed_addr #8 {
+define internal fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() unnamed_addr #7 {
entry:
- call void @llvm.amdgcn.s.barrier() #13
+ call void @llvm.amdgcn.s.barrier() #11
ret void
}
; Function Attrs: convergent nounwind
-; define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 {
-define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment, ptr %dyn) local_unnamed_addr #9 {
+define internal i32 @__kmpc_target_init(ptr nofree noundef nonnull align 8 dereferenceable(24) %KernelEnvironment, ptr %dyn) local_unnamed_addr #8 {
entry:
%0 = and i32 undef, undef
%ExecMode = getelementptr inbounds %struct.ConfigurationEnvironmentTy, ptr %KernelEnvironment, i64 0, i32 2
- %Mode = load i8, ptr %ExecMode, align 2, !tbaa !28
+ %Mode = load i8, ptr %ExecMode, align 2, !tbaa !10
%1 = and i8 %Mode, 2
%tobool.not = icmp eq i8 %1, 0
br i1 %tobool.not, label %if.else, label %if.then
if.then: ; preds = %entry
- %2 = call i32 @llvm.amdgcn.workitem.id.x() #13, !range !11
- %3 = call i32 @llvm.amdgcn.workgroup.id.x() #13
- %4 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13
+ %2 = call i32 @llvm.amdgcn.workitem.id.x() #11, !range !17
+ %3 = call i32 @llvm.amdgcn.workgroup.id.x() #11
+ %4 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #11
%5 = getelementptr i8, ptr addrspace(4) %4, i64 12
- %6 = load i32, ptr addrspace(4) %5, align 4, !invariant.load !12
+ %6 = load i32, ptr addrspace(4) %5, align 4, !invariant.load !18
%7 = getelementptr i8, ptr addrspace(4) %4, i64 4
- %8 = load i16, ptr addrspace(4) %7, align 4, !range !13, !invariant.load !12
+ %8 = load i16, ptr addrspace(4) %7, align 4, !range !19, !invariant.load !18
%conv.i.i7.i.i.i = zext i16 %8 to i32
%mul.i.i8.i.i.i = mul i32 %3, %conv.i.i7.i.i.i
%sub.i.i9.i.i.i = sub i32 %6, %mul.i.i8.i.i.i
- %9 = call i32 @llvm.umin.i32(i32 %sub.i.i9.i.i.i, i32 %conv.i.i7.i.i.i) #13
+ %9 = call i32 @llvm.umin.i32(i32 %sub.i.i9.i.i.i, i32 %conv.i.i7.i.i.i) #11
%cmp4.i.i.i = icmp ult i32 %2, %9
- call void @llvm.assume(i1 %cmp4.i.i.i) #13
+ call void @llvm.assume(i1 %cmp4.i.i.i) #11
%cmp.i.i8 = icmp eq i32 %2, 0
br i1 %cmp.i.i8, label %if.then.i, label %_ZN4ompx5state4initEb.exit.critedge
if.then.i: ; preds = %if.then
- store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14
- store i8 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), align 16, !tbaa !18
- store i32 %9, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !19
+ store i32 1, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !20
+ store i8 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), align 16, !tbaa !21
+ store i32 %9, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !22
store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4, !tbaa !23
store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8, !tbaa !24
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4, !tbaa !25
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8, !tbaa !26
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4, !tbaa !27
- store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !28
- store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !29
+ store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !10
+ store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !28
br label %_ZN4ompx5state4initEb.exit
_ZN4ompx5state4initEb.exit.critedge: ; preds = %if.then
%arrayidx.i.i.c = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %2
- store i8 0, ptr addrspace(3) %arrayidx.i.i.c, align 1, !tbaa !18
+ store i8 0, ptr addrspace(3) %arrayidx.i.i.c, align 1, !tbaa !21
br label %_ZN4ompx5state4initEb.exit
_ZN4ompx5state4initEb.exit: ; preds = %_ZN4ompx5state4initEb.exit.critedge, %if.then.i
%arrayidx.i = getelementptr inbounds [1024 x ptr], ptr addrspace(3) @_ZN12_GLOBAL__N_112ThreadStatesE, i32 0, i32 %2
- store ptr null, ptr addrspace(3) %arrayidx.i, align 8, !tbaa !30
- call fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() #14
+ store ptr null, ptr addrspace(3) %arrayidx.i, align 8, !tbaa !29
+ call fastcc void @_ZN4ompx11synchronize14threadsAlignedEv() #12
br label %if.end
if.else: ; preds = %entry
- %10 = call i32 @llvm.amdgcn.workgroup.id.x() #13
- %11 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13
+ %10 = call i32 @llvm.amdgcn.workgroup.id.x() #11
+ %11 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #11
%12 = getelementptr i8, ptr addrspace(4) %11, i64 12
- %13 = load i32, ptr addrspace(4) %12, align 4, !invariant.load !12
+ %13 = load i32, ptr addrspace(4) %12, align 4, !invariant.load !18
%14 = getelementptr i8, ptr addrspace(4) %11, i64 4
- %15 = load i16, ptr addrspace(4) %14, align 4, !range !13, !invariant.load !12
+ %15 = load i16, ptr addrspace(4) %14, align 4, !range !19, !invariant.load !18
%conv.i.i.i.i.i.i = zext i16 %15 to i32
%mul.i.i.i.i.i.i = mul i32 %10, %conv.i.i.i.i.i.i
%sub.i.i.i.i.i.i = sub i32 %13, %mul.i.i.i.i.i.i
- %16 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i.i, i32 %conv.i.i.i.i.i.i) #13
- %17 = call i32 @llvm.amdgcn.workitem.id.x() #13
+ %16 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i.i, i32 %conv.i.i.i.i.i.i) #11
+ %17 = call i32 @llvm.amdgcn.workitem.id.x() #11
%cmp.i.i.i.i26 = icmp ult i32 %17, %16
- call void @llvm.assume(i1 %cmp.i.i.i.i26) #13
+ call void @llvm.assume(i1 %cmp.i.i.i.i26) #11
%sub.i.i.i27 = add nsw i32 %16, -1
%and.i.i.i28 = and i32 %sub.i.i.i27, -64
%cmp.i2.i.i29 = icmp eq i32 %17, %and.i.i.i28
br i1 %cmp.i2.i.i29, label %if.then.i30, label %_ZN4ompx5state4initEb.exit55.critedge
if.then.i30: ; preds = %if.else
- store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14
+ store i32 0, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !20
%arrayidx.i.i46 = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %17
- store i8 0, ptr addrspace(3) %arrayidx.i.i46, align 1, !tbaa !18
+ store i8 0, ptr addrspace(3) %arrayidx.i.i46, align 1, !tbaa !21
%sub.i.i = add nsw i32 %16, -64
- store i32 %sub.i.i, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !19
+ store i32 %sub.i.i, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8, !tbaa !22
store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4, !tbaa !23
store i32 0, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8, !tbaa !24
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4, !tbaa !25
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8, !tbaa !26
store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4, !tbaa !27
- store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !28
- store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !29
+ store i32 1, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8, !tbaa !10
+ store ptr null, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 2), align 8, !tbaa !28
br label %_ZN4ompx5state4initEb.exit55
_ZN4ompx5state4initEb.exit55.critedge: ; preds = %if.else
%arrayidx.i.i46.c = getelementptr inbounds [1024 x i8], ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::SharedMemorySmartStackTy", ptr addrspace(3) @_ZN12_GLOBAL__N_122SharedMemorySmartStackE, i32 0, i32 1, i32 0), i32 0, i32 %17
- store i8 0, ptr addrspace(3) %arrayidx.i.i46.c, align 1, !tbaa !18
+ store i8 0, ptr addrspace(3) %arrayidx.i.i46.c, align 1, !tbaa !21
br label %_ZN4ompx5state4initEb.exit55
_ZN4ompx5state4initEb.exit55: ; preds = %_ZN4ompx5state4initEb.exit55.critedge, %if.then.i30
%arrayidx.i53 = getelementptr inbounds [1024 x ptr], ptr addrspace(3) @_ZN12_GLOBAL__N_112ThreadStatesE, i32 0, i32 %17
- store ptr null, ptr addrspace(3) %arrayidx.i53, align 8, !tbaa !30
+ store ptr null, ptr addrspace(3) %arrayidx.i53, align 8, !tbaa !29
br label %if.end
if.end: ; preds = %_ZN4ompx5state4initEb.exit55, %_ZN4ompx5state4initEb.exit
- %18 = call i32 @llvm.amdgcn.workgroup.id.x() #13
- %19 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #13
+ %18 = call i32 @llvm.amdgcn.workgroup.id.x() #11
+ %19 = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #11
%20 = getelementptr i8, ptr addrspace(4) %19, i64 12
- %21 = load i32, ptr addrspace(4) %20, align 4, !invariant.load !12
+ %21 = load i32, ptr addrspace(4) %20, align 4, !invariant.load !18
%22 = getelementptr i8, ptr addrspace(4) %19, i64 4
- %23 = load i16, ptr addrspace(4) %22, align 4, !range !13, !invariant.load !12
+ %23 = load i16, ptr addrspace(4) %22, align 4, !range !19, !invariant.load !18
%conv.i.i.i.i.i73 = zext i16 %23 to i32
%mul.i.i.i.i.i74 = mul i32 %18, %conv.i.i.i.i.i73
%sub.i.i.i.i.i75 = sub i32 %21, %mul.i.i.i.i.i74
- %24 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i75, i32 %conv.i.i.i.i.i73) #13
- %25 = call i32 @llvm.amdgcn.workitem.id.x() #13
+ %24 = call i32 @llvm.umin.i32(i32 %sub.i.i.i.i.i75, i32 %conv.i.i.i.i.i73) #11
+ %25 = call i32 @llvm.amdgcn.workitem.id.x() #11
%cmp.i.i.i79 = icmp ult i32 %25, %24
- call void @llvm.assume(i1 %cmp.i.i.i79) #13
+ call void @llvm.assume(i1 %cmp.i.i.i79) #11
br i1 %tobool.not, label %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit, label %_ZN4ompx7mapping12getBlockSizeEb.exit.i64
_ZN4ompx7mapping12getBlockSizeEb.exit.i64: ; preds = %if.end
%26 = load i32, ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, align 8
%cmp.i.i.i63 = icmp eq i32 %24, %26
- call void @llvm.assume(i1 %cmp.i.i.i63) #13
+ call void @llvm.assume(i1 %cmp.i.i.i63) #11
%27 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 1), align 4
%cmp9.i.i.i = icmp eq i32 %27, 0
- call void @llvm.assume(i1 %cmp9.i.i.i) #13
+ call void @llvm.assume(i1 %cmp9.i.i.i) #11
%28 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 2), align 8
%cmp19.i.i.i = icmp eq i32 %28, 0
- call void @llvm.assume(i1 %cmp19.i.i.i) #13
+ call void @llvm.assume(i1 %cmp19.i.i.i) #11
%29 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 3), align 4
%cmp29.i.i.i = icmp eq i32 %29, 1
- call void @llvm.assume(i1 %cmp29.i.i.i) #13
+ call void @llvm.assume(i1 %cmp29.i.i.i) #11
%30 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 4), align 8
%cmp39.i.i.i = icmp eq i32 %30, 1
- call void @llvm.assume(i1 %cmp39.i.i.i) #13
+ call void @llvm.assume(i1 %cmp39.i.i.i) #11
%31 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 0, i32 5), align 4
%cmp49.i.i.i = icmp eq i32 %31, 1
- call void @llvm.assume(i1 %cmp49.i.i.i) #13
+ call void @llvm.assume(i1 %cmp49.i.i.i) #11
%32 = load i32, ptr addrspace(3) getelementptr inbounds (%"struct.(anonymous namespace)::TeamStateTy", ptr addrspace(3) @_ZN12_GLOBAL__N_19TeamStateE, i32 0, i32 1), align 8
%cmp.i.i67 = icmp eq i32 %32, 1
- call void @llvm.assume(i1 %cmp.i.i67) #13
- %33 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !14
+ call void @llvm.assume(i1 %cmp.i.i67) #11
+ %33 = load i32, ptr addrspace(3) @IsSPMDMode, align 4, !tbaa !20
%tobool.i59.i = icmp ne i32 %33, 0
- call void @llvm.assume(i1 %tobool.i59.i) #13
+ call void @llvm.assume(i1 %tobool.i59.i) #11
br label %_ZN14DebugEntryRAIID2Ev.exit250
_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit: ; preds = %if.end
@@ -260,7 +260,7 @@ if.end10: ; preds = %_ZN4ompx7mapping23i
br i1 %or.cond251, label %do.body.i, label %_ZN14DebugEntryRAIID2Ev.exit250
do.body.i: ; preds = %if.end10
- call void @llvm.amdgcn.s.barrier() #13
+ call void @llvm.amdgcn.s.barrier() #11
br label %_ZN14DebugEntryRAIID2Ev.exit250
_ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end10, %_ZN4ompx7mapping23isInitialThreadInLevel0Eb.exit, %_ZN4ompx7mapping12getBlockSizeEb.exit.i64
@@ -269,65 +269,60 @@ _ZN14DebugEntryRAIID2Ev.exit250: ; preds = %do.body.i, %if.end1
}
; Function Attrs: nounwind
-define internal void @__kmpc_target_deinit() local_unnamed_addr #10 {
+define internal void @__kmpc_target_deinit() local_unnamed_addr #9 {
ret void
}
-; Function Attrs: convergent nounwind
-declare void @__kmpc_parallel_51(ptr nocapture noundef readnone %ident, i32 noundef %0, i32 noundef %if_expr, i32 noundef %num_threads, i32 noundef %proc_bind, ptr noundef %fn, ptr noundef %wrapper_fn, ptr noundef %args, i64 noundef %nargs)
+declare void @__kmpc_parallel_51(ptr noundef readnone captures(none), i32 noundef, i32 noundef, i32 noundef, i32 noundef, ptr noundef, ptr noundef, ptr noundef, i64 noundef)
-; Function Attrs: argmemonly nofree nounwind willreturn
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #12
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #10
attributes #0 = { alwaysinline convergent norecurse nounwind "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn }
-attributes #2 = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #3 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #4 = { nounwind readnone speculatable willreturn }
-attributes #5 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
-attributes #6 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn }
-attributes #7 = { convergent nounwind willreturn }
-attributes #8 = { convergent mustprogress noinline nounwind willreturn "frame-pointer"="none" "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #9 = { convergent nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #10 = { nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #11 = { convergent nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
-attributes #12 = { argmemonly nofree nounwind willreturn }
-attributes #13 = { nounwind }
-attributes #14 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm" }
-attributes #15 = { convergent nounwind }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
+attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
+attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+attributes #6 = { convergent nocallback nofree nounwind willreturn }
+attributes #7 = { convergent mustprogress noinline nounwind willreturn "frame-pointer"="none" "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
+attributes #8 = { convergent nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
+attributes #9 = { nounwind "frame-pointer"="none" "llvm.assume"="ompx_no_call_asm,ompx_no_call_asm" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx908" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" }
+attributes #10 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+attributes #11 = { nounwind }
+attributes #12 = { convergent nounwind "llvm.assume"="ompx_aligned_barrier,ompx_no_call_asm" }
!omp_offload.info = !{!0}
-!llvm.module.flags = !{!2, !3, !4, !5}
-!llvm.ident = !{!6}
+!llvm.module.flags = !{!1, !2, !3, !4}
+!llvm.ident = !{!5}
!0 = !{i32 0, i32 32, i32 18757968, !"main", i32 12, i32 0}
-!2 = !{i32 1, !"wchar_size", i32 4}
-!3 = !{i32 7, !"openmp", i32 50}
-!4 = !{i32 7, !"openmp-device", i32 50}
-!5 = !{i32 7, !"PIC Level", i32 2}
-!6 = !{!"clang version 15.0.0"}
-!7 = !{!8, !8, i64 0}
-!8 = !{!"any pointer", !9, i64 0}
-!9 = !{!"omnipotent char", !10, i64 0}
-!10 = !{!"Simple C/C++ TBAA"}
-!11 = !{i32 0, i32 1024}
-!12 = !{}
-!13 = !{i16 1, i16 1025}
-!14 = !{!15, !15, i64 0}
-!15 = !{!"int", !16, i64 0}
-!16 = !{!"omnipotent char", !17, i64 0}
-!17 = !{!"Simple C++ TBAA"}
-!18 = !{!16, !16, i64 0}
-!19 = !{!20, !15, i64 0}
-!20 = !{!"_ZTSN12_GLOBAL__N_111TeamStateTyE", !21, i64 0, !15, i64 24, !22, i64 32}
-!21 = !{!"_ZTSN12_GLOBAL__N_110ICVStateTyE", !15, i64 0, !15, i64 4, !15, i64 8, !15, i64 12, !15, i64 16, !15, i64 20}
-!22 = !{!"any pointer", !16, i64 0}
-!23 = !{!20, !15, i64 4}
-!24 = !{!20, !15, i64 8}
-!25 = !{!20, !15, i64 12}
-!26 = !{!20, !15, i64 16}
-!27 = !{!20, !15, i64 20}
-!28 = !{!20, !15, i64 24}
-!29 = !{!20, !22, i64 32}
-!30 = !{!22, !22, i64 0}
-!31 = !{!"branch_weights", i32 2000, i32 1}
+!1 = !{i32 1, !"wchar_size", i32 4}
+!2 = !{i32 7, !"openmp", i32 50}
+!3 = !{i32 7, !"openmp-device", i32 50}
+!4 = !{i32 8, !"PIC Level", i32 2}
+!5 = !{!"clang version 15.0.0"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"any pointer", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!11, !13, i64 24}
+!11 = !{!"_ZTSN12_GLOBAL__N_111TeamStateTyE", !12, i64 0, !13, i64 24, !16, i64 32}
+!12 = !{!"_ZTSN12_GLOBAL__N_110ICVStateTyE", !13, i64 0, !13, i64 4, !13, i64 8, !13, i64 12, !13, i64 16, !13, i64 20}
+!13 = !{!"int", !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C++ TBAA"}
+!16 = !{!"any pointer", !14, i64 0}
+!17 = !{i32 0, i32 1024}
+!18 = !{}
+!19 = !{i16 1, i16 1025}
+!20 = !{!13, !13, i64 0}
+!21 = !{!14, !14, i64 0}
+!22 = !{!11, !13, i64 0}
+!23 = !{!11, !13, i64 4}
+!24 = !{!11, !13, i64 8}
+!25 = !{!11, !13, i64 12}
+!26 = !{!11, !13, i64 16}
+!27 = !{!11, !13, i64 20}
+!28 = !{!11, !16, i64 32}
+!29 = !{!16, !16, i64 0}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index ef8caf48e57b7..3c3e1d78a00c1 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -3,8 +3,8 @@
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
%struct.ident_t = type { i32, i32, i32, i32, ptr }
-%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
+%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
@@ -13,6 +13,7 @@
@spmd_and_non_spmd_callees_metadata_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
@spmd_and_non_spmd_callee_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
+; Function Attrs: alwaysinline convergent norecurse nounwind
;.
; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
@@ -47,8 +48,10 @@ define internal void @spmd_callees__debug(i1 %c) {
; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees__debug
; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -56,18 +59,18 @@ define internal void @spmd_callees__debug(i1 %c) {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: br label [[TMP7:%.*]]
; AMDGPU: 4:
; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: br label [[TMP7]]
; AMDGPU: 6:
; AMDGPU-NEXT: unreachable
@@ -78,8 +81,10 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX-LABEL: define {{[^@]+}}@spmd_callees__debug
; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -87,18 +92,18 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10:[0-9]+]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12:![0-9]+]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable2
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable2(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: br label [[TMP7:%.*]]
; NVPTX: 4:
; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable1(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: br label [[TMP7]]
; NVPTX: 6:
; NVPTX-NEXT: unreachable
@@ -107,33 +112,33 @@ define internal void @spmd_callees__debug(i1 %c) {
; NVPTX-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @spmd_callees_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
%fp = select i1 %c, ptr @__omp_outlined_spmd_amenable1, ptr @__omp_outlined_spmd_amenable2
- call void %fp(ptr %.threadid_temp., ptr %.zero.addr) #6
+ call void %fp(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #8
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -144,14 +149,15 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable1
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -162,12 +168,13 @@ define internal void @__omp_outlined_spmd_amenable1(ptr noalias %.global_tid., p
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
br label %for.cond
for.cond: ; preds = %for.body, %entry
@@ -176,20 +183,17 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #4
ret void
for.body: ; preds = %for.cond
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !22
+ br label %for.cond, !llvm.loop !16
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -203,54 +207,59 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #5
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 {
-;
-;
+define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__1(ptr %.addr1.cast, ptr %.zero.addr.cast) #8
ret void
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
; AMDGPU-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR6]]
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
@@ -263,7 +272,7 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
@@ -271,7 +280,8 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR6]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
@@ -283,14 +293,15 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; NVPTX-NEXT: ret void
; NVPTX: for.body:
; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 0)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [0 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
- call void @use(ptr nocapture %x) #10
+ call void @use(ptr captures(none) %x) #4
br label %for.cond
for.cond: ; preds = %for.body, %entry
@@ -299,20 +310,18 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #4
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs.cast, i64 0)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !25
+ br label %for.cond, !llvm.loop !19
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
+
define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -326,56 +335,62 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #5
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 {
-;
-;
+define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__3(ptr %.addr1.cast, ptr %.zero.addr.cast) #8
ret void
}
-
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee
; AMDGPU-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -412,18 +427,18 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: br label [[TMP7:%.*]]
; AMDGPU: 4:
; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; AMDGPU-NEXT: br label [[TMP7]]
; AMDGPU: 6:
; AMDGPU-NEXT: unreachable
@@ -435,8 +450,10 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -472,18 +489,18 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[FP:%.*]] = select i1 [[C]], ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_not_spmd_amenable
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: br label [[TMP7:%.*]]
; NVPTX: 4:
; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable3(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]]) #[[ATTR10]]
; NVPTX-NEXT: br label [[TMP7]]
; NVPTX: 6:
; NVPTX-NEXT: unreachable
@@ -492,33 +509,33 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callee_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
%fp = select i1 %c, ptr @__omp_outlined_spmd_amenable3, ptr @__omp_outlined_not_spmd_amenable
- call void %fp(ptr %.threadid_temp., ptr %.zero.addr) #6
+ call void %fp(ptr %.threadid_temp..cast, ptr %.zero.addr.cast) #8
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; AMDGPU-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]]
; AMDGPU-NEXT: br label [[FOR_COND:%.*]]
; AMDGPU: for.cond:
@@ -530,16 +547,17 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; AMDGPU-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; AMDGPU-NEXT: ret void
; AMDGPU: for.body:
-; AMDGPU-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; AMDGPU-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable3
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR10]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
@@ -551,14 +569,15 @@ define internal void @__omp_outlined_spmd_amenable3(ptr noalias %.global_tid., p
; NVPTX-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR10]]
; NVPTX-NEXT: ret void
; NVPTX: for.body:
-; NVPTX-NEXT: store ptr [[X]], ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
+; NVPTX-NEXT: store ptr [[X]], ptr addrspace(5) [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA20:![0-9]+]]
; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS_CAST]], i64 1)
; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
;
entry:
- %captured_vars_addrs = alloca [1 x ptr], align 8
+ %captured_vars_addrs = alloca ptr, align 8, addrspace(5)
+ %captured_vars_addrs.cast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr
%x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
br label %for.cond
@@ -568,22 +587,19 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #4
call void @__kmpc_free_shared(ptr %x, i64 4)
ret void
for.body: ; preds = %for.cond
- store ptr %x, ptr %captured_vars_addrs, align 8, !tbaa !26
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
- call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs, i64 1)
+ store ptr %x, ptr %captured_vars_addrs.cast, align 8, !tbaa !20
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
+ call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr %captured_vars_addrs.cast, i64 1)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !28
+ br label %for.cond, !llvm.loop !22
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -603,63 +619,70 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias %
; NVPTX-NEXT: ret void
;
entry:
- %0 = load i32, ptr %x, align 4, !tbaa !18
+ %0 = load i32, ptr %x, align 4, !tbaa !12
%inc = add nsw i32 %0, 1
- store i32 %inc, ptr %x, align 4, !tbaa !18
- call void @unknown() #11
+ store i32 %inc, ptr %x, align 4, !tbaa !12
+ call void @unknown() #5
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 {
-;
-;
+define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
+; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]]
; AMDGPU-NEXT: ret void
;
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTADDR1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[GLOBAL_ARGS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_CAST]])
+; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[GLOBAL_ARGS]], align 8
; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA20]]
-; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR10]]
+; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1_CAST]], ptr [[DOTZERO_ADDR_CAST]], ptr [[TMP3]]) #[[ATTR10]]
; NVPTX-NEXT: ret void
;
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- %2 = load ptr, ptr %global_args, align 8
- %3 = load ptr, ptr %2, align 8, !tbaa !26
- call void @__omp_outlined__5(ptr %.addr1, ptr %.zero.addr, ptr %3) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ %2 = load ptr, ptr %global_args.cast, align 8
+ %3 = load ptr, ptr %2, align 8, !tbaa !20
+ call void @__omp_outlined__5(ptr %.addr1.cast, ptr %.zero.addr.cast, ptr %3) #8
ret void
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_callees_metadata
; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null)
; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -667,17 +690,19 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; AMDGPU-NEXT: call void @__kmpc_target_deinit()
; AMDGPU-NEXT: br label [[COMMON_RET]]
;
; NVPTX-LABEL: define {{[^@]+}}@spmd_callees_metadata
; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null)
; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
@@ -685,41 +710,43 @@ define weak ptx_kernel void @spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; NVPTX-NEXT: call void @__kmpc_target_deinit()
; NVPTX-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @spmd_callees_metadata_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void %fp(ptr %.threadid_temp., ptr %.zero.addr), !callees !31
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void %fp(ptr %.threadid_temp..cast, ptr %.zero.addr.cast), !callees !23
call void @__kmpc_target_deinit()
br label %common.ret
}
; Function Attrs: alwaysinline convergent norecurse nounwind
define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata
; AMDGPU-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
; AMDGPU-NEXT: entry:
; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; AMDGPU-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null)
; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -756,17 +783,17 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; AMDGPU-NEXT: ret void
; AMDGPU: user_code.entry:
; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; AMDGPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; AMDGPU-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; AMDGPU-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; AMDGPU-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; AMDGPU-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AMDGPU: 3:
-; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; AMDGPU-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; AMDGPU-NEXT: br label [[TMP7:%.*]]
; AMDGPU: 4:
; AMDGPU-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; AMDGPU: 5:
-; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; AMDGPU-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; AMDGPU-NEXT: br label [[TMP7]]
; AMDGPU: 6:
; AMDGPU-NEXT: unreachable
@@ -778,8 +805,10 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
-; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
+; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
+; NVPTX-NEXT: [[DOTTHREADID_TEMP__CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null)
; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -815,17 +844,17 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: ret void
; NVPTX: user_code.entry:
; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR10]]
-; NVPTX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
+; NVPTX-NEXT: store i32 0, ptr addrspace(5) [[DOTZERO_ADDR]], align 4
+; NVPTX-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA12]]
; NVPTX-NEXT: [[TMP2:%.*]] = icmp eq ptr [[FP]], @__omp_outlined_spmd_amenable_external
; NVPTX-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; NVPTX: 3:
-; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; NVPTX-NEXT: call void @__omp_outlined_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; NVPTX-NEXT: br label [[TMP7:%.*]]
; NVPTX: 4:
; NVPTX-NEXT: br i1 true, label [[TMP5:%.*]], label [[TMP6:%.*]]
; NVPTX: 5:
-; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]])
+; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable_external(ptr [[DOTTHREADID_TEMP__CAST]], ptr [[DOTZERO_ADDR_CAST]])
; NVPTX-NEXT: br label [[TMP7]]
; NVPTX: 6:
; NVPTX-NEXT: unreachable
@@ -834,28 +863,27 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: br label [[COMMON_RET]]
;
entry:
- %.zero.addr = alloca i32, align 4
- %.threadid_temp. = alloca i32, align 4
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %.threadid_temp. = alloca ptr, align 8, addrspace(5)
+ %.threadid_temp..cast = addrspacecast ptr addrspace(5) %.threadid_temp. to ptr
%0 = call i32 @__kmpc_target_init(ptr @spmd_and_non_spmd_callees_metadata_kernel_environment, ptr null)
%exec_user_code = icmp eq i32 %0, -1
br i1 %exec_user_code, label %user_code.entry, label %common.ret
-common.ret: ; preds = %entry, %user_code.entry
+common.ret: ; preds = %user_code.entry, %entry
ret void
user_code.entry: ; preds = %entry
%1 = call i32 @__kmpc_global_thread_num(ptr @1)
- store i32 0, ptr %.zero.addr, align 4
- store i32 %1, ptr %.threadid_temp., align 4, !tbaa !18
- call void %fp(ptr %.threadid_temp., ptr %.zero.addr), !callees !32
+ store i32 0, ptr %.zero.addr.cast, align 4
+ store i32 %1, ptr %.threadid_temp..cast, align 4, !tbaa !12
+ call void %fp(ptr %.threadid_temp..cast, ptr %.zero.addr.cast), !callees !24
call void @__kmpc_target_deinit()
br label %common.ret
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define void @__omp_outlined_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable_external
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -899,20 +927,17 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
- call void @spmd_amenable() #10
+ call void @spmd_amenable() #4
ret void
for.body: ; preds = %for.cond
- %0 = load i32, ptr %.global_tid., align 4, !tbaa !18
+ %0 = load i32, ptr %.global_tid., align 4, !tbaa !12
call void @__kmpc_parallel_51(ptr @1, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr undef, i64 0)
%inc = add nsw i32 %i.0, 1
- br label %for.cond, !llvm.loop !29
+ br label %for.cond, !llvm.loop !25
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr nonnull align 4 dereferenceable(4) %x) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[X:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -928,9 +953,7 @@ entry:
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 {
-;
-;
+define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #1 {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
; AMDGPU-NEXT: entry:
@@ -945,7 +968,6 @@ entry:
ret void
}
-; Function Attrs: alwaysinline convergent norecurse nounwind
define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable_external
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
@@ -957,13 +979,11 @@ define void @__omp_outlined_not_spmd_amenable_external(ptr noalias %.global_tid.
; NVPTX-NEXT: call void @__omp_outlined_not_spmd_amenable(ptr [[DOTGLOBAL_TID_]], ptr [[DOTBOUND_TID_]])
; NVPTX-NEXT: ret void
;
- call void @__omp_outlined_not_spmd_amenable(ptr %.global_tid., ptr %.bound_tid.);
+ call void @__omp_outlined_not_spmd_amenable(ptr %.global_tid., ptr %.bound_tid.)
ret void
}
define internal void @__omp_outlined_not_spmd_amenable(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined_not_spmd_amenable
; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; AMDGPU-NEXT: entry:
@@ -977,30 +997,29 @@ define internal void @__omp_outlined_not_spmd_amenable(ptr noalias %.global_tid.
; NVPTX-NEXT: ret void
;
entry:
- call void @unknown() #11
+ call void @unknown() #5
ret void
}
; Function Attrs: nosync nounwind
-declare void @__kmpc_free_shared(ptr nocapture, i64) #8
+declare void @__kmpc_free_shared(ptr captures(none), i64) #2
; Function Attrs: nofree nosync nounwind
-declare ptr @__kmpc_alloc_shared(i64) #7
+declare ptr @__kmpc_alloc_shared(i64) #3
; Function Attrs: convergent
-declare void @use(ptr nocapture) #5
+declare void @use(ptr captures(none)) #4
; Function Attrs: convergent
-declare void @unknown() #2
-declare void @unknowni32p(ptr) #2
+declare void @unknown() #5
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+; Function Attrs: convergent
+declare void @unknowni32p(ptr) #5
-; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
-define weak i32 @__kmpc_target_init(ptr, ptr) {
-;
-;
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #6
+
+define weak i32 @__kmpc_target_init(ptr %0, ptr %1) {
; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init
; AMDGPU-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
; AMDGPU-NEXT: ret i32 0
@@ -1015,62 +1034,56 @@ define weak i32 @__kmpc_target_init(ptr, ptr) {
declare void @__kmpc_get_shared_variables(ptr)
; Function Attrs: alwaysinline
-declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
+declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #7
-; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #6
; Function Attrs: convergent
-declare void @spmd_amenable() #5
+declare void @spmd_amenable() #4
; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(ptr) #6
+declare i32 @__kmpc_global_thread_num(ptr) #8
declare void @__kmpc_target_deinit()
-
-; Function Attrs: alwaysinline convergent norecurse nounwind
define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
-;
-;
entry:
- call void @unknown() #11
+ call void @unknown() #5
ret void
}
; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 {
-;
-;
+define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #1 {
entry:
- %.addr1 = alloca i32, align 4
- %.zero.addr = alloca i32, align 4
- %global_args = alloca ptr, align 8
- store i32 %1, ptr %.addr1, align 4, !tbaa !18
- store i32 0, ptr %.zero.addr, align 4
- call void @__kmpc_get_shared_variables(ptr %global_args)
- call void @__omp_outlined__9(ptr %.addr1, ptr %.zero.addr) #6
+ %.addr1 = alloca ptr, align 8, addrspace(5)
+ %.addr1.cast = addrspacecast ptr addrspace(5) %.addr1 to ptr
+ %.zero.addr = alloca ptr, align 8, addrspace(5)
+ %.zero.addr.cast = addrspacecast ptr addrspace(5) %.zero.addr to ptr
+ %global_args = alloca ptr, align 8, addrspace(5)
+ %global_args.cast = addrspacecast ptr addrspace(5) %global_args to ptr
+ store i32 %1, ptr %.addr1.cast, align 4, !tbaa !12
+ store i32 0, ptr %.zero.addr.cast, align 4
+ call void @__kmpc_get_shared_variables(ptr %global_args.cast)
+ call void @__omp_outlined__9(ptr %.addr1.cast, ptr %.zero.addr.cast) #8
ret void
}
-declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
+declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
attributes #0 = { alwaysinline convergent norecurse nounwind "kernel" }
-attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
-attributes #2 = { convergent }
-attributes #3 = { convergent norecurse nounwind }
-attributes #4 = { alwaysinline }
-attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" }
-attributes #6 = { nounwind }
-attributes #7 = { nofree nosync nounwind }
-attributes #8 = { nosync nounwind }
-attributes #9 = { alwaysinline convergent nounwind }
-attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" }
-attributes #11 = { convergent }
+attributes #1 = { convergent norecurse nounwind }
+attributes #2 = { nosync nounwind }
+attributes #3 = { nofree nosync nounwind }
+attributes #4 = { convergent "llvm.assume"="ompx_spmd_amenable" }
+attributes #5 = { convergent }
+attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #7 = { alwaysinline }
+attributes #8 = { nounwind }
!omp_offload.info = !{!0, !1, !2, !3, !4, !5}
-!llvm.module.flags = !{!12, !13, !14, !15, !16}
-!llvm.ident = !{!17}
+!llvm.module.flags = !{!6, !7, !8, !9, !10}
+!llvm.ident = !{!11}
!0 = !{i32 0, i32 64770, i32 541341486, !"", i32 74, i32 5}
!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
@@ -1078,27 +1091,26 @@ attributes #11 = { convergent }
!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2}
!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4}
!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3}
-!12 = !{i32 1, !"wchar_size", i32 4}
-!13 = !{i32 7, !"openmp", i32 50}
-!14 = !{i32 7, !"openmp-device", i32 50}
-!15 = !{i32 8, !"PIC Level", i32 2}
-!16 = !{i32 7, !"frame-pointer", i32 2}
-!17 = !{!"clang version 14.0.0"}
-!18 = !{!19, !19, i64 0}
-!19 = !{!"int", !20, i64 0}
-!20 = !{!"omnipotent char", !21, i64 0}
-!21 = !{!"Simple C/C++ TBAA"}
-!22 = distinct !{!22, !23, !24}
-!23 = !{!"llvm.loop.mustprogress"}
-!24 = !{!"llvm.loop.unroll.disable"}
-!25 = distinct !{!25, !23, !24}
-!26 = !{!27, !27, i64 0}
-!27 = !{!"any pointer", !20, i64 0}
-!28 = distinct !{!28, !23, !24}
-!29 = distinct !{!29, !23, !24}
-!30 = !{!31, !27, i64 0}
-!31 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable}
-!32 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable_external}
+!6 = !{i32 1, !"wchar_size", i32 4}
+!7 = !{i32 7, !"openmp", i32 50}
+!8 = !{i32 7, !"openmp-device", i32 50}
+!9 = !{i32 8, !"PIC Level", i32 2}
+!10 = !{i32 7, !"frame-pointer", i32 2}
+!11 = !{!"clang version 14.0.0"}
+!12 = !{!13, !13, i64 0}
+!13 = !{!"int", !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C/C++ TBAA"}
+!16 = distinct !{!16, !17, !18}
+!17 = !{!"llvm.loop.mustprogress"}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = distinct !{!19, !17, !18}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"any pointer", !14, i64 0}
+!22 = distinct !{!22, !17, !18}
+!23 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable}
+!24 = !{ptr @__omp_outlined_spmd_amenable_external, ptr @__omp_outlined_not_spmd_amenable_external}
+!25 = distinct !{!25, !17, !18}
;.
; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "kernel" }
; AMDGPU: attributes #[[ATTR1]] = { norecurse }
diff --git a/llvm/test/Verifier/AMDGPU/alloca.ll b/llvm/test/Verifier/AMDGPU/alloca.ll
new file mode 100644
index 0000000000000..7daf05d9e6b38
--- /dev/null
+++ b/llvm/test/Verifier/AMDGPU/alloca.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as %s --disable-output 2>&1 | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+target datalayout = "A5"
+
+; CHECK: alloca on amdgpu must be in addrspace(5)
+; CHECK-NEXT: %alloca = alloca i32, align 4
+; CHECK: alloca on amdgpu must be in addrspace(5)
+; CHECK-NEXT: %alloca.0 = alloca i32, i32 4, align 4
+define void @foo() {
+entry:
+ %alloca = alloca i32, align 4
+ %alloca.0 = alloca i32, i32 4, align 4
+ ret void
+}
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index de965f99fd4a1..ee6935d749614 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -33,8 +33,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
llvm.func @parallel_if(%arg0: !llvm.ptr {fir.bindc_name = "ifcond"}) {
%0 = llvm.mlir.constant(1 : i64) : i64
- %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr
- %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
+ %1 = llvm.alloca %0 x i32 {bindc_name = "d"} : (i64) -> !llvm.ptr<5>
+ %1.cast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %2 = omp.map.info var_ptr(%1.cast : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"}
%3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"}
omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) {
%4 = llvm.mlir.constant(10 : i32) : i32
More information about the llvm-commits
mailing list