[llvm] b5bc205 - AMDGPU: Convert some bit operation tests to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 15:37:10 PST 2022
Author: Matt Arsenault
Date: 2022-11-29T18:36:53-05:00
New Revision: b5bc205d75ca669c9707792c72c79e3bf0586f89
URL: https://github.com/llvm/llvm-project/commit/b5bc205d75ca669c9707792c72c79e3bf0586f89
DIFF: https://github.com/llvm/llvm-project/commit/b5bc205d75ca669c9707792c72c79e3bf0586f89.diff
LOG: AMDGPU: Convert some bit operation tests to opaque pointers
Added:
Modified:
llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
llvm/test/CodeGen/AMDGPU/and-gcn.ll
llvm/test/CodeGen/AMDGPU/and.ll
llvm/test/CodeGen/AMDGPU/andorbitset.ll
llvm/test/CodeGen/AMDGPU/andorn2.ll
llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
llvm/test/CodeGen/AMDGPU/bfe_uint.ll
llvm/test/CodeGen/AMDGPU/bfi_int.ll
llvm/test/CodeGen/AMDGPU/bfm.ll
llvm/test/CodeGen/AMDGPU/bitreverse.ll
llvm/test/CodeGen/AMDGPU/bswap.ll
llvm/test/CodeGen/AMDGPU/ctlz.ll
llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
llvm/test/CodeGen/AMDGPU/cttz.ll
llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
llvm/test/CodeGen/AMDGPU/fshl.ll
llvm/test/CodeGen/AMDGPU/fshr.ll
llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
llvm/test/CodeGen/AMDGPU/nor.ll
llvm/test/CodeGen/AMDGPU/or.ll
llvm/test/CodeGen/AMDGPU/permute.ll
llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
llvm/test/CodeGen/AMDGPU/shift-i128.ll
llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
llvm/test/CodeGen/AMDGPU/shl.ll
llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
llvm/test/CodeGen/AMDGPU/sra.ll
llvm/test/CodeGen/AMDGPU/srl.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
index 88ae414eb05f..fb622f2aaff1 100644
--- a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
+++ b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -5,14 +5,14 @@
; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
-define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 31
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = lshr i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -21,18 +21,18 @@ bb:
; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]]
-define amdgpu_kernel void @alignbit_shr_pat_v(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_v(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
bb:
%tid = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep1 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tid
- %tmp = load i64, i64 addrspace(1)* %gep1, align 8
- %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tid
- %amt = load i32, i32 addrspace(1)* %gep2, align 4
+ %gep1 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
+ %tmp = load i64, ptr addrspace(1) %gep1, align 8
+ %gep2 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tid
+ %amt = load i32, ptr addrspace(1) %gep2, align 4
%tmp3 = and i32 %amt, 31
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = lshr i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %gep2, align 4
+ store i32 %tmp6, ptr addrspace(1) %gep2, align 4
ret void
}
@@ -41,14 +41,14 @@ bb:
; GCN: v_lshr_b64
; GCN-NOT: v_alignbit_b32
-define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 30
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = lshr i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -57,14 +57,14 @@ bb:
; GCN: v_lshr_b64
; GCN-NOT: v_alignbit_b32
-define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 63
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = lshr i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -72,12 +72,12 @@ bb:
; GCN: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30
-define amdgpu_kernel void @alignbit_shr_pat_const30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_const30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp5 = lshr i64 %tmp, 30
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -86,12 +86,12 @@ bb:
; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; GCN-NOT: v_alignbit_b32
-define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp5 = lshr i64 %tmp, 33
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/and-gcn.ll b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
index ef11ae87267e..07458fcdbdd9 100644
--- a/llvm/test/CodeGen/AMDGPU/and-gcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
@@ -3,21 +3,21 @@
; FUNC-LABEL: {{^}}v_and_i64_br:
; SI: s_and_b64
-define amdgpu_kernel void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i64_br(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
entry:
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %if, label %endif
if:
- %a = load i64, i64 addrspace(1)* %aptr, align 8
- %b = load i64, i64 addrspace(1)* %bptr, align 8
+ %a = load i64, ptr addrspace(1) %aptr, align 8
+ %b = load i64, ptr addrspace(1) %bptr, align 8
%and = and i64 %a, %b
br label %endif
endif:
%tmp1 = phi i64 [%and, %if], [0, %entry]
- store i64 %tmp1, i64 addrspace(1)* %out, align 8
+ store i64 %tmp1, ptr addrspace(1) %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
index 78aa2cc01c6d..8ca046688be5 100644
--- a/llvm/test/CodeGen/AMDGPU/and.ll
+++ b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -11,12 +11,12 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
-define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = and <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -32,28 +32,28 @@ define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
-define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = and <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_and_i32:
; SI: s_and_b32
-define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
%and = and i32 %a, %b
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}s_and_constant_i32:
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
-define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @s_and_constant_i32(ptr addrspace(1) %out, i32 %a) {
%and = and i32 %a, 1234567
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, ptr addrspace(1) %out, align 4
ret void
}
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x12d687
; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x12d687
; SI: buffer_store_dword [[VK]]
-define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i32_0(ptr addrspace(1) %out, i32 %a, i32 %b) {
%and = and i32 %a, 1234567
; Just to stop future replacement of copy to vgpr + store with VALU op.
%foo = add i32 %and, %b
- store volatile i32 %foo, i32 addrspace(1)* %out
- store volatile i32 1234567, i32 addrspace(1)* %out
+ store volatile i32 %foo, ptr addrspace(1) %out
+ store volatile i32 1234567, ptr addrspace(1) %out
ret void
}
@@ -83,25 +83,25 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out
; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, 0x12d687
; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[VADD]]
-define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i32_1(ptr addrspace(1) %out, i32 %a, i32 %b) {
%and = and i32 %a, 1234567
%foo = add i32 %and, 1234567
%bar = add i32 %foo, %b
- store volatile i32 %bar, i32 addrspace(1)* %out
+ store volatile i32 %bar, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i32_vgpr_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep.a
- %b = load i32, i32 addrspace(1)* %gep.b
+ %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+ %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
+ %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %a = load i32, ptr addrspace(1) %gep.a
+ %b = load i32, ptr addrspace(1) %gep.b
%and = and i32 %a, %b
- store i32 %and, i32 addrspace(1)* %gep.out
+ store i32 %and, ptr addrspace(1) %gep.out
ret void
}
@@ -109,13 +109,13 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrs
; SI-DAG: s_load_dword [[SA:s[0-9]+]]
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
-define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i32_sgpr_vgpr(ptr addrspace(1) %out, i32 %a, ptr addrspace(1) %bptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %b = load i32, i32 addrspace(1)* %gep.b
+ %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
+ %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %b = load i32, ptr addrspace(1) %gep.b
%and = and i32 %a, %b
- store i32 %and, i32 addrspace(1)* %gep.out
+ store i32 %and, ptr addrspace(1) %gep.out
ret void
}
@@ -123,54 +123,54 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i
; SI-DAG: s_load_dword [[SA:s[0-9]+]]
; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
-define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
+define amdgpu_kernel void @v_and_i32_vgpr_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i32 %b) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep.a
+ %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+ %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %a = load i32, ptr addrspace(1) %gep.a
%and = and i32 %a, %b
- store i32 %and, i32 addrspace(1)* %gep.out
+ store i32 %and, ptr addrspace(1) %gep.out
ret void
}
; FUNC-LABEL: {{^}}v_and_constant_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_constant_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i32, ptr addrspace(1) %gep, align 4
%and = and i32 %a, 1234567
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i32, ptr addrspace(1) %gep, align 4
%and = and i32 %a, 64
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
- %a = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i32, ptr addrspace(1) %gep, align 4
%and = and i32 %a, -16
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}s_and_i64
; SI: s_and_b64
-define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_and_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -181,9 +181,9 @@ define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
; SI: s_and_b32 [[AND_TRUNC:s[0-9]+]], [[AND]], 1{{$}}
; SI: v_mov_b32_e32 [[V_AND_TRUNC:v[0-9]+]], [[AND_TRUNC]]
; SI: buffer_store_byte [[V_AND_TRUNC]]
-define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
+define amdgpu_kernel void @s_and_i1(ptr addrspace(1) %out, i1 %a, i1 %b) {
%and = and i1 %a, %b
- store i1 %and, i1 addrspace(1)* %out
+ store i1 %and, ptr addrspace(1) %out
ret void
}
@@ -191,9 +191,9 @@ define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}}
; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}}
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_and_constant_i64(ptr addrspace(1) %out, i64 %a) {
%and = and i64 %a, 549756338176
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -201,11 +201,11 @@ define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[KLO]]:[[KHI]]]
-define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
%and0 = and i64 %a, 549756338176
%and1 = and i64 %b, 549756338176
- store volatile i64 %and0, i64 addrspace(1)* %out
- store volatile i64 %and1, i64 addrspace(1)* %out
+ store volatile i64 %and0, ptr addrspace(1) %out
+ store volatile i64 %and1, ptr addrspace(1) %out
ret void
}
@@ -215,9 +215,9 @@ define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}}
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i32, i64 %a) {
+define amdgpu_kernel void @s_and_32_bit_constant_i64(ptr addrspace(1) %out, i32, i64 %a) {
%and = and i64 %a, 1234567
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -233,29 +233,29 @@ define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i32
; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) {
+define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(ptr addrspace(1) %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) {
%shl.a = shl i64 %a, 1
%shl.b = shl i64 %b, 1
%and0 = and i64 %shl.a, 62
%and1 = and i64 %shl.b, 62
%add0 = add i64 %and0, %c
%add1 = add i64 %and1, %c
- store volatile i64 %add0, i64 addrspace(1)* %out
- store volatile i64 %add1, i64 addrspace(1)* %out
+ store volatile i64 %add0, ptr addrspace(1) %out
+ store volatile i64 %add1, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_and_i64:
; SI: v_and_b32
; SI: v_and_b32
-define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.a, align 8
- %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
- %b = load i64, i64 addrspace(1)* %gep.b, align 8
+ %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.a, align 8
+ %gep.b = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
+ %b = load i64, ptr addrspace(1) %gep.b, align 8
%and = and i64 %a, %b
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -263,12 +263,12 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}}
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.a, align 8
+ %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.a, align 8
%and = and i64 %a, 1231231234567
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -281,13 +281,13 @@ define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrsp
; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, v[[HI1]]
; SI: buffer_store_dwordx2
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load volatile i64, i64 addrspace(1)* %aptr
- %b = load volatile i64, i64 addrspace(1)* %aptr
+define amdgpu_kernel void @v_and_multi_use_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
+ %a = load volatile i64, ptr addrspace(1) %aptr
+ %b = load volatile i64, ptr addrspace(1) %aptr
%and0 = and i64 %a, 1231231234567
%and1 = and i64 %b, 1231231234567
- store volatile i64 %and0, i64 addrspace(1)* %out
- store volatile i64 %and1, i64 addrspace(1)* %out
+ store volatile i64 %and0, ptr addrspace(1) %out
+ store volatile i64 %and1, ptr addrspace(1) %out
ret void
}
@@ -301,13 +301,13 @@ define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out,
; SI-NOT: and
; SI: buffer_store_dwordx2 v[[[RESLO0]]
; SI: buffer_store_dwordx2 v[[[RESLO1]]
-define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load volatile i64, i64 addrspace(1)* %aptr
- %b = load volatile i64, i64 addrspace(1)* %aptr
+define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
+ %a = load volatile i64, ptr addrspace(1) %aptr
+ %b = load volatile i64, ptr addrspace(1) %aptr
%and0 = and i64 %a, 63
%and1 = and i64 %b, 63
- store volatile i64 %and0, i64 addrspace(1)* %out
- store volatile i64 %and1, i64 addrspace(1)* %out
+ store volatile i64 %and0, ptr addrspace(1) %out
+ store volatile i64 %and1, ptr addrspace(1) %out
ret void
}
@@ -317,12 +317,12 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]]
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_i64_32_bit_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.a, align 8
+ %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.a, align 8
%and = and i64 %a, 1234567
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -332,12 +332,12 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.a, align 8
+ %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.a, align 8
%and = and i64 %a, 64
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -348,12 +348,12 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
; SI-NOT: and
; SI: buffer_store_dwordx2 v[[[VAL_LO]]:[[VAL_HI]]]
-define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_neg_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.a, align 8
+ %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.a, align 8
%and = and i64 %a, -8
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -363,9 +363,9 @@ define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64
; SI-NOT: and
; SI: buffer_store_dword
-define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 64
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -377,11 +377,11 @@ define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
; SI-NOT: and
; SI: s_add_u32
; SI-NEXT: s_addc_u32
-define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i32, i64 %b) {
+define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a, i32, i64 %b) {
%shl = shl i64 %a, 1
%and = and i64 %shl, 64
%add = add i64 %and, %b
- store i64 %add, i64 addrspace(1)* %out, align 8
+ store i64 %add, ptr addrspace(1) %out, align 8
ret void
}
@@ -391,9 +391,9 @@ define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %o
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 1
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -406,9 +406,9 @@ define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 ad
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_1.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 4607182418800017408
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -421,9 +421,9 @@ define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 13830554455654793216
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -436,9 +436,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_0.5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 4602678819172646912
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -451,9 +451,9 @@ define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 13826050856027422720
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -464,9 +464,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_2.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 4611686018427387904
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -477,9 +477,9 @@ define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 13835058055282163712
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -492,9 +492,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 4616189618054758400
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -507,9 +507,9 @@ define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64
; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 13839561654909534208
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -524,9 +524,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 1082130432
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -537,9 +537,9 @@ define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out,
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, -1065353216
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -550,9 +550,9 @@ define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %o
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 4647714815446351872
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
@@ -563,9 +563,9 @@ define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %
; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
; SI-NOT: and
; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
%and = and i64 %a, 13871086852301127680
- store i64 %and, i64 addrspace(1)* %out, align 8
+ store i64 %and, ptr addrspace(1) %out, align 8
ret void
}
attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index 196e813c71a7..a7d61582598f 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -2,49 +2,49 @@
; SI-LABEL: {{^}}s_clear_msb:
; SI: s_bitset0_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_clear_msb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
%x = and i32 %in, 2147483647
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
; SI-LABEL: {{^}}s_set_msb:
; SI: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_set_msb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
%x = or i32 %in, 2147483648
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
; SI-LABEL: {{^}}s_clear_lsb:
; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, -2
-define amdgpu_kernel void @s_clear_lsb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
%x = and i32 %in, 4294967294
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
; SI-LABEL: {{^}}s_set_lsb:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
-define amdgpu_kernel void @s_set_lsb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
%x = or i32 %in, 1
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
; SI-LABEL: {{^}}s_clear_midbit:
; SI: s_bitset0_b32 s{{[0-9]+}}, 8
-define amdgpu_kernel void @s_clear_midbit(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
%x = and i32 %in, 4294967039
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
; SI-LABEL: {{^}}s_set_midbit:
; SI: s_bitset1_b32 s{{[0-9]+}}, 8
-define amdgpu_kernel void @s_set_midbit(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
%x = or i32 %in, 256
- store i32 %x, i32 addrspace(1)* %out
+ store i32 %x, ptr addrspace(1) %out
ret void
}
@@ -58,7 +58,7 @@ define void @bitset_verifier_error() local_unnamed_addr #0 {
bb:
%i = call float @llvm.fabs.f32(float undef) #0
%i1 = bitcast float %i to i32
- store i32 %i1, i32 addrspace(1)* @gv
+ store i32 %i1, ptr addrspace(1) @gv
br label %bb2
bb2:
diff --git a/llvm/test/CodeGen/AMDGPU/andorn2.ll b/llvm/test/CodeGen/AMDGPU/andorn2.ll
index cf642155ecef..845ff782eddd 100644
--- a/llvm/test/CodeGen/AMDGPU/andorn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorn2.ll
@@ -6,44 +6,44 @@
; GCN-LABEL: {{^}}scalar_andn2_i32_one_use
; GCN: s_andn2_b32
define amdgpu_kernel void @scalar_andn2_i32_one_use(
- i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+ ptr addrspace(1) %r0, i32 %a, i32 %b) {
entry:
%nb = xor i32 %b, -1
%r0.val = and i32 %a, %nb
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
; GCN-LABEL: {{^}}scalar_andn2_i64_one_use
; GCN: s_andn2_b64
define amdgpu_kernel void @scalar_andn2_i64_one_use(
- i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+ ptr addrspace(1) %r0, i64 %a, i64 %b) {
entry:
%nb = xor i64 %b, -1
%r0.val = and i64 %a, %nb
- store i64 %r0.val, i64 addrspace(1)* %r0
+ store i64 %r0.val, ptr addrspace(1) %r0
ret void
}
; GCN-LABEL: {{^}}scalar_orn2_i32_one_use
; GCN: s_orn2_b32
define amdgpu_kernel void @scalar_orn2_i32_one_use(
- i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+ ptr addrspace(1) %r0, i32 %a, i32 %b) {
entry:
%nb = xor i32 %b, -1
%r0.val = or i32 %a, %nb
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
; GCN-LABEL: {{^}}scalar_orn2_i64_one_use
; GCN: s_orn2_b64
define amdgpu_kernel void @scalar_orn2_i64_one_use(
- i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+ ptr addrspace(1) %r0, i64 %a, i64 %b) {
entry:
%nb = xor i64 %b, -1
%r0.val = or i64 %a, %nb
- store i64 %r0.val, i64 addrspace(1)* %r0
+ store i64 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -51,12 +51,12 @@ entry:
; GCN: v_not_b32
; GCN: v_and_b32
define amdgpu_kernel void @vector_andn2_i32_s_v_one_use(
- i32 addrspace(1)* %r0, i32 %s) {
+ ptr addrspace(1) %r0, i32 %s) {
entry:
%v = call i32 @llvm.amdgcn.workitem.id.x() #1
%not = xor i32 %v, -1
%r0.val = and i32 %s, %not
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -64,12 +64,12 @@ entry:
; GCN: s_not_b32
; GCN: v_and_b32
define amdgpu_kernel void @vector_andn2_i32_v_s_one_use(
- i32 addrspace(1)* %r0, i32 %s) {
+ ptr addrspace(1) %r0, i32 %s) {
entry:
%v = call i32 @llvm.amdgcn.workitem.id.x() #1
%not = xor i32 %s, -1
%r0.val = and i32 %v, %not
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -77,12 +77,12 @@ entry:
; GCN: v_not_b32
; GCN: v_or_b32
define amdgpu_kernel void @vector_orn2_i32_s_v_one_use(
- i32 addrspace(1)* %r0, i32 %s) {
+ ptr addrspace(1) %r0, i32 %s) {
entry:
%v = call i32 @llvm.amdgcn.workitem.id.x() #1
%not = xor i32 %v, -1
%r0.val = or i32 %s, %not
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -90,12 +90,12 @@ entry:
; GCN: s_not_b32
; GCN: v_or_b32
define amdgpu_kernel void @vector_orn2_i32_v_s_one_use(
- i32 addrspace(1)* %r0, i32 %s) {
+ ptr addrspace(1) %r0, i32 %s) {
entry:
%v = call i32 @llvm.amdgcn.workitem.id.x() #1
%not = xor i32 %s, -1
%r0.val = or i32 %v, %not
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
index 654aac83b9cc..e909cf785026 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -22,9 +22,9 @@
; CIVI: s_and_b32
; CIVI: s_or_b32
-define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_ashr_v2i16(ptr addrspace(1) %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) #0 {
%result = ashr <2 x i16> %lhs, %rhs
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
@@ -44,16 +44,16 @@ define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, i32, <2 x
; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <2 x i16>, ptr addrspace(1) %in.gep
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = ashr <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -61,14 +61,14 @@ define amdgpu_kernel void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
; GFX9: s_load_dword [[RHS:s[0-9]+]]
; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = ashr <2 x i16> %vgpr, %sgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -76,42 +76,42 @@ define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
; GFX9: s_load_dword [[LHS:s[0-9]+]]
; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = ashr <2 x i16> %sgpr, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}ashr_imm_v_v2i16:
; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4
-define amdgpu_kernel void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = ashr <2 x i16> <i16 -4, i16 -4>, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
; GCN-LABEL: {{^}}ashr_v_imm_v2i16:
; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]]
-define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = ashr <2 x i16> %vgpr, <i16 8, i16 8>
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -128,16 +128,16 @@ define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
- %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <4 x i16>, ptr addrspace(1) %in.gep
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = ashr <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
@@ -146,14 +146,14 @@ define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
%result = ashr <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
index 1c72c08a34c7..2ac65f8a1618 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_ubfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -40,19 +40,19 @@ define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
- %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %src = load volatile i32, i32 addrspace(1)* %in0.gep
- %width = load volatile i32, i32 addrspace(1)* %in0.gep
+ %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+ %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %src = load volatile i32, ptr addrspace(1) %in0.gep
+ %width = load volatile i32, ptr addrspace(1) %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
+ store i32 %bfe, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_ubfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -99,20 +99,20 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
- %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %src = load volatile i32, i32 addrspace(1)* %in0.gep
- %width = load volatile i32, i32 addrspace(1)* %in0.gep
+ %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+ %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %src = load volatile i32, ptr addrspace(1) %in0.gep
+ %width = load volatile i32, ptr addrspace(1) %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
- store volatile i32 %shl, i32 addrspace(1)* undef
+ store i32 %bfe, ptr addrspace(1) %out.gep
+ store volatile i32 %shl, ptr addrspace(1) undef
ret void
}
-define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_ubfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -144,15 +144,15 @@ define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
+ store i32 %bfe, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_ubfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -191,16 +191,16 @@ define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = lshr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
- store volatile i32 %shl, i32 addrspace(1)* undef
+ store i32 %bfe, ptr addrspace(1) %out.gep
+ store volatile i32 %shl, ptr addrspace(1) undef
ret void
}
-define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_sbfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -238,19 +238,19 @@ define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
- %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %src = load volatile i32, i32 addrspace(1)* %in0.gep
- %width = load volatile i32, i32 addrspace(1)* %in0.gep
+ %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+ %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %src = load volatile i32, ptr addrspace(1) %in0.gep
+ %width = load volatile i32, ptr addrspace(1) %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
+ store i32 %bfe, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_sbfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -297,20 +297,20 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
- %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %src = load volatile i32, i32 addrspace(1)* %in0.gep
- %width = load volatile i32, i32 addrspace(1)* %in0.gep
+ %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+ %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %src = load volatile i32, ptr addrspace(1) %in0.gep
+ %width = load volatile i32, ptr addrspace(1) %in0.gep
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
- store volatile i32 %shl, i32 addrspace(1)* undef
+ store i32 %bfe, ptr addrspace(1) %out.gep
+ store volatile i32 %shl, ptr addrspace(1) undef
ret void
}
-define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_sbfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -342,15 +342,15 @@ define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
+ store i32 %bfe, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_sbfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -389,16 +389,16 @@ define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
%sub = sub i32 32, %width
%shl = shl i32 %src, %sub
%bfe = ashr i32 %shl, %sub
- store i32 %bfe, i32 addrspace(1)* %out.gep
- store volatile i32 %shl, i32 addrspace(1)* undef
+ store i32 %bfe, ptr addrspace(1) %out.gep
+ store volatile i32 %shl, ptr addrspace(1) undef
ret void
}
-define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
; SI-LABEL: s_sbfe_or_shl_shl_uniform_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -430,18 +430,18 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(i32 addrspace(1)* %out,
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
- %a0 = load i32, i32 addrspace(1) * %in0
- %b0 = load i32, i32 addrspace(1) * %in1
+ %a0 = load i32, ptr addrspace(1) %in0
+ %b0 = load i32, ptr addrspace(1) %in1
%a1 = shl i32 %a0, 17
%b1 = shl i32 %b0, 17
%or = or i32 %a1, %b1
%result = ashr i32 %or, 17
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; TODO ashr(or(shl(x,c1),shl(y,c2)),c1) -> sign_extend_inreg(or(x,shl(y,c2-c1))) iff c2 >= c1
-define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x, i32 addrspace(1)* %y) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
; SI-LABEL: s_sbfe_or_shl_shl_nonuniform_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -477,18 +477,18 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(i32 addrspace(1)* %o
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
- %a0 = load i32, i32 addrspace(1) * %x
- %b0 = load i32, i32 addrspace(1) * %y
+ %a0 = load i32, ptr addrspace(1) %x
+ %b0 = load i32, ptr addrspace(1) %y
%a1 = shl i32 %a0, 17
%b1 = shl i32 %b0, 19
%or = or i32 %a1, %b1
%result = ashr i32 %or, 17
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; Don't fold as 'other shl' amount is less than the sign_extend_inreg type.
-define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x, i32 addrspace(1)* %y) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
; SI-LABEL: s_sbfe_or_shl_shl_toosmall_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -524,13 +524,13 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(i32 addrspace(1)* %out
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
- %a0 = load i32, i32 addrspace(1) * %x
- %b0 = load i32, i32 addrspace(1) * %y
+ %a0 = load i32, ptr addrspace(1) %x
+ %b0 = load i32, ptr addrspace(1) %y
%a1 = shl i32 %a0, 17
%b1 = shl i32 %b0, 16
%or = or i32 %a1, %b1
%result = ashr i32 %or, 17
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bfe_uint.ll b/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
index 2c8c9a5ec932..4b91dbdd6ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
@@ -2,11 +2,11 @@
; CHECK: {{^}}bfe_def:
; CHECK: BFE_UINT
-define amdgpu_kernel void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
+define amdgpu_kernel void @bfe_def(ptr addrspace(1) %out, i32 %x) {
entry:
%0 = lshr i32 %x, 5
%1 = and i32 %0, 15 ; 0xf
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -17,10 +17,10 @@ entry:
; CHECK: {{^}}bfe_shift:
; CHECK-NOT: BFE_UINT
-define amdgpu_kernel void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
+define amdgpu_kernel void @bfe_shift(ptr addrspace(1) %out, i32 %x) {
entry:
%0 = lshr i32 %x, 16
%1 = and i32 %0, 65535 ; 0xffff
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index 6c0a183289e1..445bc7f586ec 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -8,7 +8,7 @@
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
-define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; GFX7-LABEL: s_bfi_def_i32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -85,7 +85,7 @@ entry:
%1 = and i32 %z, %0
%2 = and i32 %y, %x
%3 = or i32 %1, %2
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -131,7 +131,7 @@ entry:
; SHA-256 Ch function
; z ^ (x & (y ^ z))
-define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; GFX7-LABEL: s_bfi_sha256_ch:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -207,7 +207,7 @@ entry:
%0 = xor i32 %y, %z
%1 = and i32 %x, %0
%2 = xor i32 %z, %1
- store i32 %2, i32 addrspace(1)* %out
+ store i32 %2, ptr addrspace(1) %out
ret void
}
@@ -469,7 +469,7 @@ entry:
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
-define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; GFX7-LABEL: s_bfi_sha256_ma:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -551,7 +551,7 @@ entry:
%1 = or i32 %x, %z
%2 = and i32 %y, %1
%3 = or i32 %0, %2
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -1633,7 +1633,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
%and1 = and i64 %not.a, %mask
%bitselect = or i64 %and0, %and1
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -1721,7 +1721,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -1809,7 +1809,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -1904,6 +1904,6 @@ entry:
%or1 = or i64 %and0, %and1
%scalar.use = add i64 %or1, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bfm.ll b/llvm/test/CodeGen/AMDGPU/bfm.ll
index 2c9c692c42a4..789a64220e62 100644
--- a/llvm/test/CodeGen/AMDGPU/bfm.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfm.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
-define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
; SI-LABEL: s_bfm_pattern:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -29,11 +29,11 @@ define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y)
%a = shl i32 1, %x
%b = sub i32 %a, 1
%c = shl i32 %b, %y
- store i32 %c, i32 addrspace(1)* %out
+ store i32 %c, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
; SI-LABEL: s_bfm_pattern_simple:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -59,11 +59,11 @@ define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x)
; VI-NEXT: s_endpgm
%a = shl i32 1, %x
%b = sub i32 %a, 1
- store i32 %b, i32 addrspace(1)* %out
+ store i32 %b, ptr addrspace(1) %out
ret void
}
-define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
; SI-LABEL: v_bfm_pattern:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -86,11 +86,11 @@ define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
%a = shl i32 1, %x
%b = sub i32 %a, 1
%c = shl i32 %b, %y
- store i32 %c, i32 addrspace(1)* %out
+ store i32 %c, ptr addrspace(1) %out
ret void
}
-define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
; SI-LABEL: v_bfm_pattern_simple:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,7 +112,7 @@ define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
; VI-NEXT: s_setpc_b64 s[30:31]
%a = shl i32 1, %x
%b = sub i32 %a, 1
- store i32 %b, i32 addrspace(1)* %out
+ store i32 %b, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 79c95d930a4f..3090b4b14709 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -16,7 +16,7 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
-define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
+define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #0 {
; SI-LABEL: s_brev_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -57,11 +57,11 @@ define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val)
; GISEL-NEXT: flat_store_short v[0:1], v2
; GISEL-NEXT: s_endpgm
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
- store i16 %brev, i16 addrspace(1)* %out
+ store i16 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
; SI-LABEL: v_brev_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -114,13 +114,13 @@ define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrsp
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: flat_store_short v[0:1], v2
; GISEL-NEXT: s_endpgm
- %val = load i16, i16 addrspace(1)* %valptr
+ %val = load i16, ptr addrspace(1) %valptr
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
- store i16 %brev, i16 addrspace(1)* %out
+ store i16 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
+define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) #0 {
; SI-LABEL: s_brev_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -157,11 +157,11 @@ define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val)
; GISEL-NEXT: flat_store_dword v[0:1], v2
; GISEL-NEXT: s_endpgm
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
- store i32 %brev, i32 addrspace(1)* %out
+ store i32 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
; SI-LABEL: v_brev_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -214,14 +214,14 @@ define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; GISEL-NEXT: flat_store_dword v[0:1], v2
; GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %gep
+ %gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %gep
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
- store i32 %brev, i32 addrspace(1)* %out
+ store i32 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i32(ptr addrspace(1) noalias %out, <2 x i32> %val) #0 {
; SI-LABEL: s_brev_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -265,11 +265,11 @@ define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GISEL-NEXT: s_endpgm
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
- store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
; SI-LABEL: v_brev_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -325,14 +325,14 @@ define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep
+ %gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i32>, ptr addrspace(1) %gep
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
- store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
+define amdgpu_kernel void @s_brev_i64(ptr addrspace(1) noalias %out, i64 %val) #0 {
; SI-LABEL: s_brev_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -373,11 +373,11 @@ define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val)
; GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GISEL-NEXT: s_endpgm
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
- store i64 %brev, i64 addrspace(1)* %out
+ store i64 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
; SI-LABEL: v_brev_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -433,14 +433,14 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2]
; GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i64, i64 addrspace(1)* %valptr, i32 %tid
- %val = load i64, i64 addrspace(1)* %gep
+ %gep = getelementptr i64, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i64, ptr addrspace(1) %gep
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
- store i64 %brev, i64 addrspace(1)* %out
+ store i64 %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64> %val) #0 {
; SI-LABEL: s_brev_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -489,11 +489,11 @@ define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
; GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GISEL-NEXT: s_endpgm
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
- store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %brev, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
; SI-LABEL: v_brev_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -555,10 +555,10 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
; GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
; GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <2 x i64> , <2 x i64> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i64>, <2 x i64> addrspace(1)* %gep
+ %gep = getelementptr <2 x i64> , ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i64>, ptr addrspace(1) %gep
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
- store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %brev, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index 368463c1ac5c..7202ca648258 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -15,7 +15,7 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
declare i48 @llvm.bswap.i48(i48) #1
-define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -45,13 +45,13 @@ define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(
; VI-NEXT: v_perm_b32 v0, 0, s2, v0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %in, align 4
+ %val = load i32, ptr addrspace(1) %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
- store i32 %bswap, i32 addrspace(1)* %out, align 4
+ store i32 %bswap, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -85,13 +85,13 @@ define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; VI-NEXT: v_perm_b32 v0, 0, s2, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, ptr addrspace(1) %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
- store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %bswap, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -133,13 +133,13 @@ define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; VI-NEXT: v_perm_b32 v0, 0, s8, v0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
+ %val = load <4 x i32>, ptr addrspace(1) %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
- store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
+ store <4 x i32> %bswap, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v8i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -199,13 +199,13 @@ define amdgpu_kernel void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[12:15], 0 offset:16
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0
; VI-NEXT: s_endpgm
- %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
+ %val = load <8 x i32>, ptr addrspace(1) %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
- store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
+ store <8 x i32> %bswap, ptr addrspace(1) %out, align 32
ret void
}
-define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -239,13 +239,13 @@ define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(
; VI-NEXT: v_perm_b32 v0, 0, s3, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load i64, i64 addrspace(1)* %in, align 8
+ %val = load i64, ptr addrspace(1) %in, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
- store i64 %bswap, i64 addrspace(1)* %out, align 8
+ store i64 %bswap, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -287,13 +287,13 @@ define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; VI-NEXT: v_perm_b32 v0, 0, s9, v0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: s_endpgm
- %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, ptr addrspace(1) %in, align 16
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
- store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
+ store <2 x i64> %bswap, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -353,9 +353,9 @@ define amdgpu_kernel void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[12:15], 0 offset:16
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0
; VI-NEXT: s_endpgm
- %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
+ %val = load <4 x i64>, ptr addrspace(1) %in, align 32
%bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
- store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
+ store <4 x i64> %bswap, ptr addrspace(1) %out, align 32
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ddc6734335dc..d4af1d73f66b 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -20,7 +20,7 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
; SI-LABEL: s_ctlz_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -99,11 +99,11 @@ define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val)
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -201,14 +201,14 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep, align 4
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -320,14 +320,14 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
- store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %ctlz, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -466,14 +466,14 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
- store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+ store <4 x i32> %ctlz, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i8:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -588,13 +588,13 @@ define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace
; GFX11-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %val = load i8, i8 addrspace(1)* %valptr
+ %val = load i8, ptr addrspace(1) %valptr
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
- store i8 %ctlz, i8 addrspace(1)* %out
+ store i8 %ctlz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
; SI-LABEL: s_ctlz_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -691,11 +691,11 @@ define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
- store i64 %ctlz, i64 addrspace(1)* %out
+ store i64 %ctlz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
; SI-LABEL: s_ctlz_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -784,11 +784,11 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX11-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
%trunc = trunc i64 %ctlz to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_ctlz_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -906,15 +906,15 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
- store i64 %ctlz, i64 addrspace(1)* %out.gep
+ store i64 %ctlz, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_ctlz_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1032,16 +1032,16 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
%trunc = trunc i64 %ctlz to i32
- store i32 %trunc, i32 addrspace(1)* %out.gep
+ store i32 %trunc, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i32_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1138,16 +1138,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i32_sel_ne_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1244,17 +1244,17 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
; TODO: Should be able to eliminate select here as well.
-define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i32_sel_eq_bitwidth:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1366,16 +1366,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %ctlz, 32
%sel = select i1 %cmp, i32 -1, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i32_sel_ne_bitwidth:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1487,16 +1487,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %ctlz, 32
%sel = select i1 %cmp, i32 %ctlz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i8_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1600,16 +1600,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
- %val = load i8, i8 addrspace(1)* %valptr.gep
+ %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i8, ptr addrspace(1) %valptr.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
%cmp = icmp eq i8 %val, 0
%sel = select i1 %cmp, i8 -1, i8 %ctlz
- store i8 %sel, i8 addrspace(1)* %out
+ store i8 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i16_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1726,16 +1726,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
- %val = load i16, i16 addrspace(1)* %valptr
+ %val = load i16, ptr addrspace(1) %valptr
%ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
%cmp = icmp eq i16 %val, 0
%sel = select i1 %cmp, i16 -1, i16 %ctlz
- store i16 %sel, i16 addrspace(1)* %out
+ store i16 %sel, ptr addrspace(1) %out
ret void
}
; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_i7_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1845,11 +1845,11 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
- %val = load i7, i7 addrspace(1)* %valptr.gep
+ %valptr.gep = getelementptr i7, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i7, ptr addrspace(1) %valptr.gep
%ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
%cmp = icmp eq i7 %val, 0
%sel = select i1 %cmp, i7 -1, i7 %ctlz
- store i7 %sel, i7 addrspace(1)* %out
+ store i7 %sel, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 0d02bf8c01a2..354f5b954659 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -16,7 +16,7 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
; SI-LABEL: s_ctlz_zero_undef_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -63,11 +63,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX9-GISEL-NEXT: s_endpgm
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -134,14 +134,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep, align 4
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ store i32 %ctlz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -212,14 +212,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
- store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %ctlz, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -298,14 +298,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
; GFX9-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
- store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+ store <4 x i32> %ctlz, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i8:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -388,14 +388,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
- %val = load i8, i8 addrspace(1)* %in.gep
+ %in.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i8, ptr addrspace(1) %in.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
- store i8 %ctlz, i8 addrspace(1)* %out
+ store i8 %ctlz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
; SI-LABEL: s_ctlz_zero_undef_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -457,11 +457,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
- store i64 %ctlz, i64 addrspace(1)* %out
+ store i64 %ctlz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
; SI-LABEL: s_ctlz_zero_undef_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -518,11 +518,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: s_endpgm
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
%trunc = trunc i64 %ctlz to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -604,15 +604,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
- store i64 %ctlz, i64 addrspace(1)* %out.gep
+ store i64 %ctlz, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -694,16 +694,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
%trunc = trunc i64 %ctlz to i32
- store i32 %trunc, i32 addrspace(1)* %out.gep
+ store i32 %trunc, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -773,16 +773,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -852,16 +852,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i8_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -941,16 +941,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
- %val = load i8, i8 addrspace(1)* %valptr.gep
+ %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i8, ptr addrspace(1) %valptr.gep
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
%cmp = icmp eq i8 %val, 0
%sel = select i1 %cmp, i8 -1, i8 %ctlz
- store i8 %sel, i8 addrspace(1)* %out
+ store i8 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1042,18 +1042,18 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
- store volatile i32 %sel, i32 addrspace(1)* %out
- store volatile i1 %cmp, i1 addrspace(1)* undef
+ store volatile i32 %sel, ptr addrspace(1) %out
+ store volatile i1 %cmp, ptr addrspace(1) undef
ret void
}
; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1127,17 +1127,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 0, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1211,17 +1211,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 0
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
; Compare on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1296,17 +1296,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp eq i32 %val, 1
%sel = select i1 %cmp, i32 0, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1381,11 +1381,11 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
%cmp = icmp ne i32 %val, 1
%sel = select i1 %cmp, i32 %ctlz, i32 0
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index fd8c73926650..e871b80cbe29 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -19,7 +19,7 @@ declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
; SI-LABEL: s_cttz_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -84,11 +84,11 @@ define amdgpu_kernel void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val)
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_endpgm
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
- store i32 %cttz, i32 addrspace(1)* %out, align 4
+ store i32 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -172,14 +172,14 @@ define amdgpu_kernel void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep, align 4
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
- store i32 %cttz, i32 addrspace(1)* %out, align 4
+ store i32 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -274,14 +274,14 @@ define amdgpu_kernel void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
- store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %cttz, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -398,14 +398,14 @@ define amdgpu_kernel void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
; GFX10-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
- store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+ store <4 x i32> %cttz, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @v_cttz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i8:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -496,13 +496,13 @@ define amdgpu_kernel void @v_cttz_i8(i8 addrspace(1)* noalias %out, i8 addrspace
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
; GFX10-GISEL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
- %val = load i8, i8 addrspace(1)* %valptr
+ %val = load i8, ptr addrspace(1) %valptr
%cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
- store i8 %cttz, i8 addrspace(1)* %out
+ store i8 %cttz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_cttz_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
; SI-LABEL: s_cttz_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -582,11 +582,11 @@ define amdgpu_kernel void @s_cttz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
%cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
- store i64 %cttz, i64 addrspace(1)* %out
+ store i64 %cttz, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
; SI-LABEL: s_cttz_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -660,11 +660,11 @@ define amdgpu_kernel void @s_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: s_endpgm
%cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
%trunc = trunc i64 %cttz to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_cttz_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_cttz_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -765,15 +765,15 @@ define amdgpu_kernel void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
; GFX10-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
- store i64 %cttz, i64 addrspace(1)* %out.gep
+ store i64 %cttz, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_cttz_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
; SI-LABEL: v_cttz_i64_trunc:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -874,16 +874,16 @@ define amdgpu_kernel void @v_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
- %val = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+ %val = load i64, ptr addrspace(1) %in.gep
%cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
%trunc = trunc i64 %cttz to i32
- store i32 %trunc, i32 addrspace(1)* %out.gep
+ store i32 %trunc, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i32_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -968,16 +968,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %cttz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i32_sel_ne_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1062,17 +1062,17 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %cttz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
; TODO: Should be able to eliminate select here as well.
-define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i32_sel_eq_bitwidth:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1168,16 +1168,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %cttz, 32
%sel = select i1 %cmp, i32 -1, i32 %cttz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i32_sel_ne_bitwidth:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1273,16 +1273,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %cttz, 32
%sel = select i1 %cmp, i32 %cttz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i8_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1373,16 +1373,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
- %val = load i8, i8 addrspace(1)* %valptr.gep
+ %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i8, ptr addrspace(1) %valptr.gep
%cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
%cmp = icmp eq i8 %val, 0
%sel = select i1 %cmp, i8 -1, i8 %cttz
- store i8 %sel, i8 addrspace(1)* %out
+ store i8 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i16_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1480,16 +1480,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
- %val = load i16, i16 addrspace(1)* %valptr
+ %val = load i16, ptr addrspace(1) %valptr
%cttz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone
%cmp = icmp eq i16 %val, 0
%sel = select i1 %cmp, i16 -1, i16 %cttz
- store i16 %sel, i16 addrspace(1)* %out
+ store i16 %sel, ptr addrspace(1) %out
ret void
}
; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_i7_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1585,11 +1585,11 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
- %val = load i7, i7 addrspace(1)* %valptr.gep
+ %valptr.gep = getelementptr i7, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i7, ptr addrspace(1) %valptr.gep
%cttz = call i7 @llvm.cttz.i7(i7 %val, i1 false) nounwind readnone
%cmp = icmp eq i7 %val, 0
%sel = select i1 %cmp, i7 -1, i7 %cttz
- store i7 %sel, i7 addrspace(1)* %out
+ store i7 %sel, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index ba3ed974a34a..9e4c88f9da40 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -13,7 +13,7 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
; SI-LABEL: s_cttz_zero_undef_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -60,11 +60,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX9-GISEL-NEXT: s_endpgm
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
- store i32 %cttz, i32 addrspace(1)* %out, align 4
+ store i32 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_zero_undef_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -131,14 +131,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
- %val = load i32, i32 addrspace(1)* %in.gep, align 4
+ %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+ %val = load i32, ptr addrspace(1) %in.gep, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
- store i32 %cttz, i32 addrspace(1)* %out, align 4
+ store i32 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_zero_undef_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -209,14 +209,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
; GFX9-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
- store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %cttz, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
; SI-LABEL: v_cttz_zero_undef_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -295,14 +295,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
; GFX9-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
- %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+ %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
- store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+ store <4 x i32> %cttz, ptr addrspace(1) %out, align 16
ret void
}
-define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noalias %out, i8 %val) nounwind {
; SI-LABEL: s_cttz_zero_undef_i8_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -369,11 +369,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noa
%cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i8 %val, 0
%ret = select i1 %cttz_ret, i8 %cttz, i8 32
- store i8 %cttz, i8 addrspace(1)* %out, align 4
+ store i8 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) noalias %out, i16 %val) nounwind {
; SI-LABEL: s_cttz_zero_undef_i16_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -440,11 +440,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(i16 addrspace(1)* n
%cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i16 %val, 0
%ret = select i1 %cttz_ret, i16 %cttz, i16 32
- store i16 %cttz, i16 addrspace(1)* %out, align 4
+ store i16 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(ptr addrspace(1) noalias %out, i32 %val) nounwind {
; SI-LABEL: s_cttz_zero_undef_i32_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
@@ -493,11 +493,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
%cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i32 %val, 0
%ret = select i1 %cttz_ret, i32 %cttz, i32 32
- store i32 %cttz, i32 addrspace(1)* %out, align 4
+ store i32 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(ptr addrspace(1) noalias %out, i64 %val) nounwind {
; SI-LABEL: s_cttz_zero_undef_i64_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -560,11 +560,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
%cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i64 %val, 0
%ret = select i1 %cttz_ret, i64 %cttz, i64 32
- store i64 %cttz, i64 addrspace(1)* %out, align 4
+ store i64 %cttz, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_zero_undef_i8_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -643,15 +643,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noa
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %val = load i8, ptr addrspace(1) %arrayidx, align 1
%cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i8 %val, 0
%ret = select i1 %cttz_ret, i8 %cttz, i8 32
- store i8 %ret, i8 addrspace(1)* %out, align 4
+ store i8 %ret, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_zero_undef_i16_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -744,15 +744,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* n
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i16, i16 addrspace(1)* %arrayidx, align 1
+ %val = load i16, ptr addrspace(1) %arrayidx, align 1
%cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i16 %val, 0
%ret = select i1 %cttz_ret, i16 %cttz, i16 32
- store i16 %ret, i16 addrspace(1)* %out, align 4
+ store i16 %ret, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_zero_undef_i32_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -865,15 +865,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+ %val = load i32, ptr addrspace(1) %arrayidx, align 1
%cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i32 %val, 0
%ret = select i1 %cttz_ret, i32 %cttz, i32 32
- store i32 %ret, i32 addrspace(1)* %out, align 4
+ store i32 %ret, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_zero_undef_i64_with_select:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1057,15 +1057,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 64, v0, vcc
; GFX9-GISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i64, i64 addrspace(1)* %arrayidx, align 1
+ %val = load i64, ptr addrspace(1) %arrayidx, align 1
%cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone
%cttz_ret = icmp ne i64 %val, 0
%ret = select i1 %cttz_ret, i64 %cttz, i64 64
- store i64 %ret, i64 addrspace(1)* %out, align 4
+ store i64 %ret, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_i32_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1179,15 +1179,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+ %val = load i32, ptr addrspace(1) %arrayidx, align 1
%ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp eq i32 %val, 0
%sel = select i1 %cmp, i32 -1, i32 %ctlz
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_i32_sel_ne_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1301,15 +1301,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+ %val = load i32, ptr addrspace(1) %arrayidx, align 1
%ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %val, 0
%sel = select i1 %cmp, i32 %ctlz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_i32_sel_ne_bitwidth:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1431,15 +1431,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+ %val = load i32, ptr addrspace(1) %arrayidx, align 1
%ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
%cmp = icmp ne i32 %ctlz, 32
%sel = select i1 %cmp, i32 %ctlz, i32 -1
- store i32 %sel, i32 addrspace(1)* %out
+ store i32 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+ define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_i8_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1518,15 +1518,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i8, i8 addrspace(1)* %arrayidx, align 1
+ %val = load i8, ptr addrspace(1) %arrayidx, align 1
%ctlz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
%cmp = icmp eq i8 %val, 0
%sel = select i1 %cmp, i8 -1, i8 %ctlz
- store i8 %sel, i8 addrspace(1)* %out
+ store i8 %sel, ptr addrspace(1) %out
ret void
}
- define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+ define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
; SI-LABEL: v_cttz_i16_sel_eq_neg1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1620,11 +1620,11 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
- %val = load i16, i16 addrspace(1)* %arrayidx, align 1
+ %val = load i16, ptr addrspace(1) %arrayidx, align 1
%ctlz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone
%cmp = icmp eq i16 %val, 0
%sel = select i1 %cmp, i16 -1, i16 %ctlz
- store i16 %sel, i16 addrspace(1)* %out
+ store i16 %sel, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
index 0fa06b87eba2..919924730724 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
@@ -3,45 +3,45 @@
; GCN-LABEL: and_zext:
; GCN: v_and_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @and_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @and_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
- %a = load i16, i16 addrspace(1)* %in
- %b = load i16, i16 addrspace(1)* %ptr
+ %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+ %a = load i16, ptr addrspace(1) %in
+ %b = load i16, ptr addrspace(1) %ptr
%c = add i16 %a, %b
%val16 = and i16 %c, %a
%val32 = zext i16 %val16 to i32
- store i32 %val32, i32 addrspace(1)* %out
+ store i32 %val32, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: or_zext:
; GCN: v_or_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @or_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @or_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
- %a = load i16, i16 addrspace(1)* %in
- %b = load i16, i16 addrspace(1)* %ptr
+ %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+ %a = load i16, ptr addrspace(1) %in
+ %b = load i16, ptr addrspace(1) %ptr
%c = add i16 %a, %b
%val16 = or i16 %c, %a
%val32 = zext i16 %val16 to i32
- store i32 %val32, i32 addrspace(1)* %out
+ store i32 %val32, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: xor_zext:
; GCN: v_xor_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @xor_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @xor_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
- %a = load i16, i16 addrspace(1)* %in
- %b = load i16, i16 addrspace(1)* %ptr
+ %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+ %a = load i16, ptr addrspace(1) %in
+ %b = load i16, ptr addrspace(1) %ptr
%c = add i16 %a, %b
%val16 = xor i16 %c, %a
%val32 = zext i16 %val16 to i32
- store i32 %val32, i32 addrspace(1)* %out
+ store i32 %val32, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index ed94e9ac7f3c..9773f4d7908a 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -10,7 +10,7 @@ declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
-define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @fshl_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
; SI-LABEL: fshl_i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -105,11 +105,11 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; GFX11-NEXT: s_endpgm
entry:
%0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
- store i32 %0, i32 addrspace(1)* %in
+ store i32 %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @fshl_i32_imm(ptr addrspace(1) %in, i32 %x, i32 %y) {
; SI-LABEL: fshl_i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -176,11 +176,11 @@ define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
; GFX11-NEXT: s_endpgm
entry:
%0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
- store i32 %0, i32 addrspace(1)* %in
+ store i32 %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+define amdgpu_kernel void @fshl_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
; SI-LABEL: fshl_v2i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -307,11 +307,11 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; GFX11-NEXT: s_endpgm
entry:
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
- store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+ store <2 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @fshl_v2i32_imm(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y) {
; SI-LABEL: fshl_v2i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -393,11 +393,11 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
; GFX11-NEXT: s_endpgm
entry:
%0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
- store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+ store <2 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+define amdgpu_kernel void @fshl_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; SI-LABEL: fshl_v4i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -584,11 +584,11 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; GFX11-NEXT: s_endpgm
entry:
%0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
- store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+ store <4 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @fshl_v4i32_imm(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y) {
; SI-LABEL: fshl_v4i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -690,12 +690,12 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
; GFX11-NEXT: s_endpgm
entry:
%0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
- store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+ store <4 x i32> %0, ptr addrspace(1) %in
ret void
}
; (a ^ b) | a --> a | b
-define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
+define amdgpu_kernel void @orxor2or1(ptr addrspace(1) %in, i32 %a, i32 %b) {
; SI-LABEL: orxor2or1:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -786,6 +786,6 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
%fshl = call i32 @llvm.fshl.i32(i32 %or, i32 %xor, i32 7)
%cond = icmp eq i32 %fshl, 0
%r = select i1 %cond, i32 %a, i32 %b
- store i32 %r, i32 addrspace(1)* %in
+ store i32 %r, ptr addrspace(1) %in
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll
index ece946d355e2..5d3be8db524b 100644
--- a/llvm/test/CodeGen/AMDGPU/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshr.ll
@@ -19,7 +19,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
declare i24 @llvm.fshr.i24(i24, i24, i24)
declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>)
-define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @fshr_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
; SI-LABEL: fshr_i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -97,11 +97,11 @@ define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
; GFX11-NEXT: s_endpgm
entry:
%0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
- store i32 %0, i32 addrspace(1)* %in
+ store i32 %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @fshr_i32_imm(ptr addrspace(1) %in, i32 %x, i32 %y) {
; SI-LABEL: fshr_i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -168,11 +168,11 @@ define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
; GFX11-NEXT: s_endpgm
entry:
%0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
- store i32 %0, i32 addrspace(1)* %in
+ store i32 %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+define amdgpu_kernel void @fshr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
; SI-LABEL: fshr_v2i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -269,11 +269,11 @@ define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
; GFX11-NEXT: s_endpgm
entry:
%0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
- store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+ store <2 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @fshr_v2i32_imm(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y) {
; SI-LABEL: fshr_v2i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -355,11 +355,11 @@ define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
; GFX11-NEXT: s_endpgm
entry:
%0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
- store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+ store <2 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+define amdgpu_kernel void @fshr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; SI-LABEL: fshr_v4i32:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -486,11 +486,11 @@ define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
; GFX11-NEXT: s_endpgm
entry:
%0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
- store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+ store <4 x i32> %0, ptr addrspace(1) %in
ret void
}
-define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @fshr_v4i32_imm(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y) {
; SI-LABEL: fshr_v4i32_imm:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -590,7 +590,7 @@ define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
; GFX11-NEXT: s_endpgm
entry:
%0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
- store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+ store <4 x i32> %0, ptr addrspace(1) %in
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
index 6ac24a99fbb1..a955523a2b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
+++ b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
-define amdgpu_kernel void @divergent_or3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_or3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_or3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -16,20 +16,19 @@ define amdgpu_kernel void @divergent_or3_b32(<3 x i32> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+ %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = or i32 %i5, %i4
%i8 = or i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %i2, align 16
ret void
}
-define amdgpu_kernel void @divergent_or3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_or3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_or3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -47,20 +46,19 @@ define amdgpu_kernel void @divergent_or3_b64(<3 x i64> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+ %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = or i64 %i5, %i4
%i8 = or i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %i2, align 32
ret void
}
-define amdgpu_kernel void @divergent_and3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_and3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_and3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -76,20 +74,19 @@ define amdgpu_kernel void @divergent_and3_b32(<3 x i32> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+ %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = and i32 %i5, %i4
%i8 = and i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %i2, align 16
ret void
}
-define amdgpu_kernel void @divergent_and3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_and3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_and3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -110,20 +107,19 @@ define amdgpu_kernel void @divergent_and3_b64(<3 x i64> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+ %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = and i64 %i5, %i4
%i8 = and i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %i2, align 32
ret void
}
-define amdgpu_kernel void @divergent_xor3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_xor3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_xor3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -138,20 +134,19 @@ define amdgpu_kernel void @divergent_xor3_b32(<3 x i32> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+ %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = xor i32 %i5, %i4
%i8 = xor i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %i2, align 16
ret void
}
-define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_xor3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: divergent_xor3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -170,20 +165,19 @@ define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
bb:
%i = tail call i32 @llvm.amdgcn.workitem.id.x()
%i1 = zext i32 %i to i64
- %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+ %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+ %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = xor i64 %i5, %i4
%i8 = xor i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %i2, align 32
ret void
}
-define amdgpu_kernel void @uniform_or3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_or3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_or3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -197,19 +191,18 @@ define amdgpu_kernel void @uniform_or3_b32(<3 x i32> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+ %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = or i32 %i5, %i4
%i8 = or i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %arg, align 16
ret void
}
-define amdgpu_kernel void @uniform_or3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_or3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_or3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -225,19 +218,18 @@ define amdgpu_kernel void @uniform_or3_b64(<3 x i64> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+ %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = or i64 %i5, %i4
%i8 = or i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %arg, align 32
ret void
}
-define amdgpu_kernel void @uniform_and3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_and3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_and3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -251,19 +243,18 @@ define amdgpu_kernel void @uniform_and3_b32(<3 x i32> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+ %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = and i32 %i5, %i4
%i8 = and i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %arg, align 16
ret void
}
-define amdgpu_kernel void @uniform_and3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_and3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_and3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -279,19 +270,18 @@ define amdgpu_kernel void @uniform_and3_b64(<3 x i64> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+ %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = and i64 %i5, %i4
%i8 = and i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %arg, align 32
ret void
}
-define amdgpu_kernel void @uniform_xor3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_xor3_b32(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_xor3_b32:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -305,19 +295,18 @@ define amdgpu_kernel void @uniform_xor3_b32(<3 x i32> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dword v0, v1, s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+ %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
%i4 = extractelement <3 x i32> %i3, i64 0
%i5 = extractelement <3 x i32> %i3, i64 1
%i6 = extractelement <3 x i32> %i3, i64 2
%i7 = xor i32 %i5, %i4
%i8 = xor i32 %i7, %i6
%i9 = xor i32 %i8, -1
- %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
- store i32 %i9, i32 addrspace(1)* %i10, align 16
+ store i32 %i9, ptr addrspace(1) %arg, align 16
ret void
}
-define amdgpu_kernel void @uniform_xor3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_xor3_b64(ptr addrspace(1) %arg) {
; GCN-LABEL: uniform_xor3_b64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -333,15 +322,14 @@ define amdgpu_kernel void @uniform_xor3_b64(<3 x i64> addrspace(1)* %arg) {
; GCN-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GCN-NEXT: s_endpgm
bb:
- %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+ %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
%i4 = extractelement <3 x i64> %i3, i64 0
%i5 = extractelement <3 x i64> %i3, i64 1
%i6 = extractelement <3 x i64> %i3, i64 2
%i7 = xor i64 %i5, %i4
%i8 = xor i64 %i7, %i6
%i9 = xor i64 %i8, -1
- %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
- store i64 %i9, i64 addrspace(1)* %i10, align 32
+ store i64 %i9, ptr addrspace(1) %arg, align 32
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
index 61a4f8fb32cd..2821f8e696f0 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
@@ -37,20 +37,20 @@ define i64 @lshl_add_u64_vvv(i64 %v, i64 %s, i64 %a) {
define amdgpu_kernel void @lshl_add_u64_s2v(i64 %v) {
; GCN-LABEL: lshl_add_u64_s2v:
; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 2, v[{{[0-9:]+}}]
- %a = load i64, i64* undef
+ %a = load i64, ptr undef
%shl = shl i64 %v, 2
%add = add i64 %shl, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
define amdgpu_kernel void @lshl_add_u64_v2s(i64 %a) {
; GCN-LABEL: lshl_add_u64_v2s:
; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 2, s[{{[0-9:]+}}]
- %v = load i64, i64* undef
+ %v = load i64, ptr undef
%shl = shl i64 %v, 2
%add = add i64 %shl, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
@@ -61,7 +61,7 @@ define amdgpu_kernel void @lshl_add_u64_s2s(i64 %v, i64 %a) {
; GCN: s_addc_u32
%shl = shl i64 %v, 2
%add = add i64 %shl, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
@@ -75,18 +75,18 @@ define i64 @add_u64_vv(i64 %v, i64 %a) {
define amdgpu_kernel void @add_u64_sv(i64 %v) {
; GCN-LABEL: add_u64_sv:
; GCN: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
- %a = load i64, i64* undef
+ %a = load i64, ptr undef
%add = add i64 %v, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
define amdgpu_kernel void @add_u64_vs(i64 %a) {
; GCN-LABEL: add_u64_vs:
; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
- %v = load i64, i64* undef
+ %v = load i64, ptr undef
%add = add i64 %v, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
@@ -95,14 +95,14 @@ define amdgpu_kernel void @add_u64_ss(i64 %v, i64 %a) {
; GCN: s_add_u32
; GCN: s_addc_u32 s1, s1, s3
%add = add i64 %v, %a
- store i64 %add, i64* undef
+ store i64 %add, ptr undef
ret void
}
-define i32 @lshl_add_u64_gep(i32 *%p, i64 %a) {
+define i32 @lshl_add_u64_gep(ptr %p, i64 %a) {
; GCN-LABEL: lshl_add_u64_gep:
; GCN: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
- %gep = getelementptr inbounds i32, i32* %p, i64 %a
- %v = load i32, i32* %gep
+ %gep = getelementptr inbounds i32, ptr %p, i64 %a
+ %v = load i32, ptr %gep
ret i32 %v
}
diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
index 2d2d6161869d..7830bfc6ac7f 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-- -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @zext_shl64_to_32(ptr addrspace(1) nocapture %out, i32 %x) {
; GCN-LABEL: zext_shl64_to_32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -17,11 +17,11 @@ define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i3
%and = and i32 %x, 1073741823
%ext = zext i32 %and to i64
%shl = shl i64 %ext, 2
- store i64 %shl, i64 addrspace(1)* %out, align 4
+ store i64 %shl, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @sext_shl64_to_32(ptr addrspace(1) nocapture %out, i32 %x) {
; GCN-LABEL: sext_shl64_to_32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -38,11 +38,11 @@ define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i3
%and = and i32 %x, 536870911
%ext = sext i32 %and to i64
%shl = shl i64 %ext, 2
- store i64 %shl, i64 addrspace(1)* %out, align 4
+ store i64 %shl, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @zext_shl64_overflow(ptr addrspace(1) nocapture %out, i32 %x) {
; GCN-LABEL: zext_shl64_overflow:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -60,11 +60,11 @@ define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out,
%and = and i32 %x, 2147483647
%ext = zext i32 %and to i64
%shl = shl i64 %ext, 2
- store i64 %shl, i64 addrspace(1)* %out, align 4
+ store i64 %shl, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @sext_shl64_overflow(ptr addrspace(1) nocapture %out, i32 %x) {
; GCN-LABEL: sext_shl64_overflow:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -82,11 +82,11 @@ define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out,
%and = and i32 %x, 2147483647
%ext = sext i32 %and to i64
%shl = shl i64 %ext, 2
- store i64 %shl, i64 addrspace(1)* %out, align 4
+ store i64 %shl, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
; GCN-LABEL: mulu24_shl64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -104,12 +104,12 @@ bb:
%tmp1 = and i32 %tmp, 6
%mulconv = mul nuw nsw i32 %tmp1, 7
%tmp2 = zext i32 %mulconv to i64
- %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2
- store i32 0, i32 addrspace(1)* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp2
+ store i32 0, ptr addrspace(1) %tmp3, align 4
ret void
}
-define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) {
+define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) {
; GCN-LABEL: muli24_shl64:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -132,14 +132,14 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = sext i32 %tmp to i64
- %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2
- %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+ %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp2
+ %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
%tmp5 = or i32 %tmp4, -8388608
%tmp6 = mul nsw i32 %tmp5, -7
%tmp7 = zext i32 %tmp6 to i64
%tmp8 = shl nuw nsw i64 %tmp7, 3
- %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2
- store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8
+ %tmp9 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %tmp2
+ store i64 %tmp8, ptr addrspace(1) %tmp9, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index dede51844b1c..f885325d38f7 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -5,7 +5,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_lshr_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
; GFX9-LABEL: s_lshr_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -71,11 +71,11 @@ define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = lshr <2 x i16> %lhs, %rhs
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: v_lshr_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -152,17 +152,17 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <2 x i16>, ptr addrspace(1) %in.gep
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = lshr <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
; GFX9-LABEL: lshr_v_s_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -246,15 +246,15 @@ define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = lshr <2 x i16> %vgpr, %sgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
; GFX9-LABEL: lshr_s_v_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -338,15 +338,15 @@ define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = lshr <2 x i16> %sgpr, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: lshr_imm_v_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -422,15 +422,15 @@ define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = lshr <2 x i16> <i16 8, i16 8>, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: lshr_v_imm_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -502,15 +502,15 @@ define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = lshr <2 x i16> %vgpr, <i16 8, i16 8>
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: v_lshr_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -600,17 +600,17 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
- %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <4 x i16>, ptr addrspace(1) %in.gep
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = lshr <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: lshr_v_imm_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -690,11 +690,11 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
%result = lshr <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/nor.ll b/llvm/test/CodeGen/AMDGPU/nor.ll
index 6781ab6745f0..df8875b5563b 100644
--- a/llvm/test/CodeGen/AMDGPU/nor.ll
+++ b/llvm/test/CodeGen/AMDGPU/nor.ll
@@ -6,11 +6,11 @@
; GCN-LABEL: {{^}}scalar_nor_i32_one_use
; GCN: s_nor_b32
define amdgpu_kernel void @scalar_nor_i32_one_use(
- i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+ ptr addrspace(1) %r0, i32 %a, i32 %b) {
entry:
%or = or i32 %a, %b
%r0.val = xor i32 %or, -1
- store i32 %r0.val, i32 addrspace(1)* %r0
+ store i32 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -20,24 +20,24 @@ entry:
; GCN: s_not_b32
; GCN: s_add_i32
define amdgpu_kernel void @scalar_nor_i32_mul_use(
- i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) {
+ ptr addrspace(1) %r0, ptr addrspace(1) %r1, i32 %a, i32 %b) {
entry:
%or = or i32 %a, %b
%r0.val = xor i32 %or, -1
%r1.val = add i32 %or, %a
- store i32 %r0.val, i32 addrspace(1)* %r0
- store i32 %r1.val, i32 addrspace(1)* %r1
+ store i32 %r0.val, ptr addrspace(1) %r0
+ store i32 %r1.val, ptr addrspace(1) %r1
ret void
}
; GCN-LABEL: {{^}}scalar_nor_i64_one_use
; GCN: s_nor_b64
define amdgpu_kernel void @scalar_nor_i64_one_use(
- i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+ ptr addrspace(1) %r0, i64 %a, i64 %b) {
entry:
%or = or i64 %a, %b
%r0.val = xor i64 %or, -1
- store i64 %r0.val, i64 addrspace(1)* %r0
+ store i64 %r0.val, ptr addrspace(1) %r0
ret void
}
@@ -48,13 +48,13 @@ entry:
; GCN: s_add_u32
; GCN: s_addc_u32
define amdgpu_kernel void @scalar_nor_i64_mul_use(
- i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) {
+ ptr addrspace(1) %r0, ptr addrspace(1) %r1, i64 %a, i64 %b) {
entry:
%or = or i64 %a, %b
%r0.val = xor i64 %or, -1
%r1.val = add i64 %or, %a
- store i64 %r0.val, i64 addrspace(1)* %r0
- store i64 %r1.val, i64 addrspace(1)* %r1
+ store i64 %r0.val, ptr addrspace(1) %r0
+ store i64 %r1.val, ptr addrspace(1) %r1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index c2e615e97132..8f82a53cafe1 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -9,12 +9,12 @@
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @or_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = or <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -28,37 +28,37 @@ define amdgpu_kernel void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addr
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @or_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = or <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}scalar_or_i32:
; SI: s_or_b32
-define amdgpu_kernel void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @scalar_or_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
- store i32 %or, i32 addrspace(1)* %out
+ store i32 %or, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}vector_or_i32:
; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
- %loada = load i32, i32 addrspace(1)* %a
+define amdgpu_kernel void @vector_or_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, i32 %b) {
+ %loada = load i32, ptr addrspace(1) %a
%or = or i32 %loada, %b
- store i32 %or, i32 addrspace(1)* %out
+ store i32 %or, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}scalar_or_literal_i32:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
-define amdgpu_kernel void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @scalar_or_literal_i32(ptr addrspace(1) %out, i32 %a) {
%or = or i32 %a, 99999
- store i32 %or, i32 addrspace(1)* %out, align 4
+ store i32 %or, ptr addrspace(1) %out, align 4
ret void
}
@@ -68,9 +68,9 @@ define amdgpu_kernel void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a)
; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]]
-define amdgpu_kernel void @scalar_or_literal_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_literal_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
%or = or i64 %a, 4261135838621753
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -82,12 +82,12 @@ define amdgpu_kernel void @scalar_or_literal_i64(i64 addrspace(1)* %out, [8 x i3
; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3039
; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xf237b
-define amdgpu_kernel void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @scalar_or_literal_multi_use_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
%or = or i64 %a, 4261135838621753
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
%foo = add i64 %b, 4261135838621753
- store volatile i64 %foo, i64 addrspace(1)* undef
+ store volatile i64 %foo, ptr addrspace(1) undef
ret void
}
@@ -101,9 +101,9 @@ define amdgpu_kernel void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %ou
; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
; SI-NOT: or_b32
; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
%or = or i64 %a, 63
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -111,11 +111,11 @@ define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x
; SI-NOT: or_b32
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 63
; SI-NOT: or_b32
-define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
%or = or i64 %a, 63
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
%foo = add i64 %b, 63
- store volatile i64 %foo, i64 addrspace(1)* undef
+ store volatile i64 %foo, ptr addrspace(1) undef
ret void
}
@@ -125,27 +125,27 @@ define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)*
; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}}
; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]]
; SI: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
-define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
%or = or i64 %a, -8
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}vector_or_literal_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
-define amdgpu_kernel void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32, i32 addrspace(1)* %a, align 4
+define amdgpu_kernel void @vector_or_literal_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i32, ptr addrspace(1) %a, align 4
%or = or i32 %loada, 65535
- store i32 %or, i32 addrspace(1)* %out, align 4
+ store i32 %or, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
-define amdgpu_kernel void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32, i32 addrspace(1)* %a, align 4
+define amdgpu_kernel void @vector_or_inline_immediate_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i32, ptr addrspace(1) %a, align 4
%or = or i32 %loada, 4
- store i32 %or, i32 addrspace(1)* %out, align 4
+ store i32 %or, ptr addrspace(1) %out, align 4
ret void
}
@@ -154,30 +154,30 @@ define amdgpu_kernel void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
; SI: s_or_b64
-define amdgpu_kernel void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
%or = or i64 %a, %b
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
- %loadb = load i64, i64 addrspace(1)* %b, align 8
+define amdgpu_kernel void @vector_or_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i64, ptr addrspace(1) %a, align 8
+ %loadb = load i64, ptr addrspace(1) %b, align 8
%or = or i64 %loada, %loadb
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}scalar_vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
- %loada = load i64, i64 addrspace(1)* %a
+define amdgpu_kernel void @scalar_vector_or_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, i64 %b) {
+ %loada = load i64, ptr addrspace(1) %a
%or = or i64 %loada, %b
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -186,10 +186,10 @@ define amdgpu_kernel void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addr
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_loadimm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i64, ptr addrspace(1) %a, align 8
%or = or i64 %loada, 22470723082367
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -200,10 +200,10 @@ define amdgpu_kernel void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 add
; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0
; SI: buffer_store_dwordx2 v[[[LO_RESULT]]:[[HI_VREG]]]
; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i64, ptr addrspace(1) %a, align 8
%or = or i64 %loada, 8
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -213,10 +213,10 @@ define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspa
; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}}
; SI: buffer_store_dwordx2 v[[[RES_LO]]:[[RES_HI]]]
; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_neg_inline_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i64, ptr addrspace(1) %a, align 8
%or = or i64 %loada, -8
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -226,10 +226,10 @@ define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out,
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffff38, v[[LO_VREG]]
; SI: buffer_store_dwordx2
; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_neg_literal(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+ %loada = load i64, ptr addrspace(1) %a, align 8
%or = or i64 %loada, -200
- store i64 %or, i64 addrspace(1)* %out
+ store i64 %or, ptr addrspace(1) %out
ret void
}
@@ -239,10 +239,10 @@ define amdgpu_kernel void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64
; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
-define amdgpu_kernel void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @trunc_i64_or_to_i32(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
%add = or i64 %b, %a
%trunc = trunc i64 %add to i32
- store i32 %trunc, i32 addrspace(1)* %out, align 8
+ store i32 %trunc, ptr addrspace(1) %out, align 8
ret void
}
@@ -250,14 +250,14 @@ define amdgpu_kernel void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, [8 x i32]
; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], vcc
-define amdgpu_kernel void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float, float addrspace(1)* %in0
- %b = load float, float addrspace(1)* %in1
+define amdgpu_kernel void @or_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+ %a = load float, ptr addrspace(1) %in0
+ %b = load float, ptr addrspace(1) %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
%result = zext i1 %or to i32
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
@@ -267,10 +267,10 @@ define amdgpu_kernel void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in
; SI: s_cmp_eq_u32
; SI: s_cselect_b64 [[C2:[^,]+]], -1, 0
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], [[C1]], [[C2]]
-define amdgpu_kernel void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @s_or_i1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) {
%cmp0 = icmp eq i32 %a, %b
%cmp1 = icmp eq i32 %c, %d
%or = or i1 %cmp0, %cmp1
- store i1 %or, i1 addrspace(1)* %out
+ store i1 %or, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/permute.ll b/llvm/test/CodeGen/AMDGPU/permute.ll
index b0b684e770ff..c6671fa51795 100644
--- a/llvm/test/CodeGen/AMDGPU/permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh8_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: lsh8_or_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -19,16 +19,16 @@ define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %ar
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = shl i32 %tmp, 8
%tmp3 = and i32 %arg1, 255
%tmp4 = or i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsr24_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: lsr24_or_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -46,16 +46,16 @@ define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %a
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = lshr i32 %tmp, 24
%tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
%tmp4 = or i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_or_lsr24:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -74,17 +74,17 @@ define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %a
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
%tmp3 = lshr i32 %arg1, 24
%tmp4 = or i32 %tmp2, %tmp3
%tmp5 = xor i32 %tmp4, -2147483648
- store i32 %tmp5, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp5, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_or_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -102,16 +102,16 @@ define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = and i32 %tmp, -16711936
%tmp3 = and i32 %arg1, 16711935
%tmp4 = or i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh8_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: lsh8_or_lsr24:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -128,16 +128,16 @@ define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = shl i32 %tmp, 8
%tmp3 = lshr i32 %arg1, 24
%tmp4 = or i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh16_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: lsh16_or_lsr24:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -155,16 +155,16 @@ define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = shl i32 %tmp, 16
%tmp3 = lshr i32 %arg1, 24
%tmp4 = or i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_xor_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_xor_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -182,17 +182,17 @@ define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %ar
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = and i32 %tmp, -16776961
%tmp3 = and i32 %arg1, 16776960
%tmp4 = xor i32 %tmp2, %tmp3
- store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp4, ptr addrspace(1) %gep, align 4
ret void
}
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
-define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_or_or_and:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -212,17 +212,17 @@ define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%and = and i32 %tmp, 16711935 ; 0x00ff00ff
%tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
%tmp2 = or i32 %tmp1, -65536
%tmp3 = or i32 %tmp2, %and
- store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp3, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_and_shl(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: and_or_and_shl:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -240,17 +240,17 @@ define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = shl i32 %tmp, 16
%tmp3 = and i32 %arg1, 65535
%tmp4 = or i32 %tmp2, %tmp3
%and = and i32 %tmp4, 4278190335
- store i32 %and, i32 addrspace(1)* %gep, align 4
+ store i32 %and, ptr addrspace(1) %gep, align 4
ret void
}
-define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @or_and_or(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: or_and_or:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -268,17 +268,17 @@ define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%or1 = or i32 %tmp, 16776960 ; 0x00ffff00
%or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
%and = and i32 %or1, %or2
- store i32 %and, i32 addrspace(1)* %gep, align 4
+ store i32 %and, ptr addrspace(1) %gep, align 4
ret void
}
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
-define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_ffff0500(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: known_ffff0500:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -304,21 +304,21 @@ define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %load = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %load = load i32, ptr addrspace(1) %gep, align 4
%mask1 = or i32 %arg1, 32768 ; 0x8000
%mask2 = or i32 %load, 4
%and = and i32 %mask2, 16711935 ; 0x00ff00ff
%tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
%tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
%tmp3 = or i32 %tmp2, %and
- store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp3, ptr addrspace(1) %gep, align 4
%v = and i32 %tmp3, 4294934532 ; 0xffff8004
- store i32 %v, i32 addrspace(1)* %arg, align 4
+ store i32 %v, ptr addrspace(1) %arg, align 4
ret void
}
-define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_050c0c00(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: known_050c0c00:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -341,20 +341,20 @@ define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %tmp = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %tmp = load i32, ptr addrspace(1) %gep, align 4
%tmp2 = shl i32 %tmp, 16
%mask = or i32 %arg1, 4
%tmp3 = and i32 %mask, 65535
%tmp4 = or i32 %tmp2, %tmp3
%and = and i32 %tmp4, 4278190335
- store i32 %and, i32 addrspace(1)* %gep, align 4
+ store i32 %and, ptr addrspace(1) %gep, align 4
%v = and i32 %and, 16776964
- store i32 %v, i32 addrspace(1)* %arg, align 4
+ store i32 %v, ptr addrspace(1) %arg, align 4
ret void
}
-define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_ffff8004(ptr addrspace(1) nocapture %arg, i32 %arg1) {
; GCN-LABEL: known_ffff8004:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -378,17 +378,17 @@ define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32
; GCN-NEXT: s_endpgm
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
- %load = load i32, i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+ %load = load i32, ptr addrspace(1) %gep, align 4
%mask1 = or i32 %arg1, 4
%mask2 = or i32 %load, 32768 ; 0x8000
%and = and i32 %mask1, 16711935 ; 0x00ff00ff
%tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
%tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
%tmp3 = or i32 %tmp2, %and
- store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+ store i32 %tmp3, ptr addrspace(1) %gep, align 4
%v = and i32 %tmp3, 4294934532 ; 0xffff8004
- store i32 %v, i32 addrspace(1)* %arg, align 4
+ store i32 %v, ptr addrspace(1) %arg, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index 7104f1dac006..885dfdedfcea 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -2,7 +2,7 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Extract the high bit of the 1st quarter
-define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: v_uextract_bit_31_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -24,17 +24,17 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128
; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
; GCN-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i128, i128 addrspace(1)* %in.gep
+ %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i128, ptr addrspace(1) %in.gep
%srl = lshr i128 %ld.64, 31
%bit = and i128 %srl, 1
- store i128 %bit, i128 addrspace(1)* %out.gep
+ store i128 %bit, ptr addrspace(1) %out.gep
ret void
}
; Extract the high bit of the 2nd quarter
-define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: v_uextract_bit_63_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -55,17 +55,17 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128
; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
; GCN-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i128, i128 addrspace(1)* %in.gep
+ %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i128, ptr addrspace(1) %in.gep
%srl = lshr i128 %ld.64, 63
%bit = and i128 %srl, 1
- store i128 %bit, i128 addrspace(1)* %out.gep
+ store i128 %bit, ptr addrspace(1) %out.gep
ret void
}
; Extract the high bit of the 3rd quarter
-define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_95_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: v_uextract_bit_95_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -88,17 +88,17 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128
; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i128, i128 addrspace(1)* %in.gep
+ %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i128, ptr addrspace(1) %in.gep
%srl = lshr i128 %ld.64, 95
%bit = and i128 %srl, 1
- store i128 %bit, i128 addrspace(1)* %out.gep
+ store i128 %bit, ptr addrspace(1) %out.gep
ret void
}
; Extract the high bit of the 4th quarter
-define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_127_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: v_uextract_bit_127_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -119,17 +119,17 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128
; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
; GCN-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i128, i128 addrspace(1)* %in.gep
+ %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i128, ptr addrspace(1) %in.gep
%srl = lshr i128 %ld.64, 127
%bit = and i128 %srl, 1
- store i128 %bit, i128 addrspace(1)* %out.gep
+ store i128 %bit, ptr addrspace(1) %out.gep
ret void
}
; Spans more than 2 dword boundaries
-define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
; GCN-LABEL: v_uextract_bit_34_100_i128:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -150,12 +150,12 @@ define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i
; GCN-NEXT: buffer_store_dwordx4 v[4:7], v[8:9], s[0:3], 0 addr64
; GCN-NEXT: s_endpgm
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i128, i128 addrspace(1)* %in.gep
+ %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i128, ptr addrspace(1) %in.gep
%srl = lshr i128 %ld.64, 34
%bit = and i128 %srl, 73786976294838206463
- store i128 %bit, i128 addrspace(1)* %out.gep
+ store i128 %bit, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index 378212dc326f..42bfbbe19dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -9,14 +9,14 @@
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 31
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -27,14 +27,14 @@ define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 add
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 63
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -43,14 +43,14 @@ define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 add
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 1
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -59,14 +59,14 @@ define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addr
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 20
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -76,14 +76,14 @@ define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 add
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 32
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -93,14 +93,14 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 33
%bit = and i64 %srl, 1
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -109,14 +109,14 @@ define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 add
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_21_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 20
%bit = and i64 %srl, 3
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -125,14 +125,14 @@ define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_30_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 1
%bit = and i64 %srl, 1073741823
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -141,14 +141,14 @@ define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 a
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 1
%bit = and i64 %srl, 2147483647
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -160,14 +160,14 @@ define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 a
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 31
%bit = and i64 %srl, 3
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -177,14 +177,14 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 33
%bit = and i64 %srl, 3
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -195,14 +195,14 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64
; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_30_60_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 30
%bit = and i64 %srl, 1073741823
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -212,14 +212,14 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 33
%bit = and i64 %srl, 1073741823
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -228,14 +228,14 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64
; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 31
%and = and i64 %srl, 4294967295
- store i64 %and, i64 addrspace(1)* %out
+ store i64 %and, ptr addrspace(1) %out
ret void
}
@@ -244,15 +244,15 @@ define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
; GCN: buffer_store_dword v[[SHIFT]]
-define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 31
%trunc = trunc i64 %srl to i32
%bit = and i32 %trunc, 1
- store i32 %bit, i32 addrspace(1)* %out.gep
+ store i32 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -260,15 +260,15 @@ define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %ou
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 3
%trunc = trunc i64 %srl to i32
%bit = and i32 %trunc, 1
- store i32 %bit, i32 addrspace(1)* %out.gep
+ store i32 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -276,15 +276,15 @@ define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 33
%trunc = trunc i64 %srl to i32
%bit = and i32 %trunc, 1
- store i32 %bit, i32 addrspace(1)* %out.gep
+ store i32 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -294,15 +294,15 @@ define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %ou
; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
; GCN-NOT: v[[SHRLO]]
; GCN: buffer_store_dword v[[SHRLO]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 31
%trunc = trunc i64 %srl to i32
%bit = and i32 %trunc, 3
- store i32 %bit, i32 addrspace(1)* %out.gep
+ store i32 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -315,14 +315,14 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)*
; GCN-NOT: v[[SHRLO]]
; GCN-NOT: v[[SHRHI]]
; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
-define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @and_not_mask_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 20
%bit = and i64 %srl, 4
- store i64 %bit, i64 addrspace(1)* %out.gep
+ store i64 %bit, ptr addrspace(1) %out.gep
ret void
}
@@ -336,15 +336,15 @@ define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspac
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 27
%bit = and i64 %srl, 3
- store volatile i64 %srl, i64 addrspace(1)* %out
- store volatile i64 %bit, i64 addrspace(1)* %out
+ store volatile i64 %srl, ptr addrspace(1) %out
+ store volatile i64 %bit, ptr addrspace(1) %out
ret void
}
@@ -356,15 +356,15 @@ define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspac
; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
; GCN-DAG: buffer_store_dwordx2 v[[[SHR]]:[[ZERO_SHR]]]
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO_BFE]]]
-define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 34
%bit = and i64 %srl, 7
- store volatile i64 %srl, i64 addrspace(1)* %out
- store volatile i64 %bit, i64 addrspace(1)* %out
+ store volatile i64 %srl, ptr addrspace(1) %out
+ store volatile i64 %bit, ptr addrspace(1) %out
ret void
}
@@ -374,19 +374,19 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspac
; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[BFE]]:{{[0-9]+\]}}
; GCN: buffer_store_dword v[[ZERO]]
-define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #1 {
%id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
- %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
- %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
- %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
- %ld.64 = load i64, i64 addrspace(1)* %in.gep
+ %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+ %out0.gep = getelementptr i64, ptr addrspace(1) %out0, i32 %id.x
+ %out1.gep = getelementptr i32, ptr addrspace(1) %out1, i32 %id.x
+ %ld.64 = load i64, ptr addrspace(1) %in.gep
%srl = lshr i64 %ld.64, 33
%bit = and i64 %srl, 7
- store volatile i64 %bit, i64 addrspace(1)* %out0.gep
+ store volatile i64 %bit, ptr addrspace(1) %out0.gep
%srl.srl32 = lshr i64 %srl, 32
%srl.hi = trunc i64 %srl.srl32 to i32
- store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
+ store volatile i32 %srl.hi, ptr addrspace(1) %out1.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index 403e553f8bec..da1faae414ce 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -209,7 +209,7 @@ define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = shl i128 %lhs, %rhs
- store i128 %shift, i128 addrspace(1)* null
+ store i128 %shift, ptr addrspace(1) null
ret void
}
@@ -242,7 +242,7 @@ define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = lshr i128 %lhs, %rhs
- store i128 %shift, i128 addrspace(1)* null
+ store i128 %shift, ptr addrspace(1) null
ret void
}
@@ -276,7 +276,7 @@ define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = ashr i128 %lhs, %rhs
- store i128 %shift, i128 addrspace(1)* null
+ store i128 %shift, ptr addrspace(1) null
ret void
}
@@ -497,7 +497,7 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = shl <2 x i128> %lhs, %rhs
- store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+ store <2 x i128> %shift, ptr addrspace(1) null
ret void
}
@@ -569,7 +569,7 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = lshr <2 x i128> %lhs, %rhs
- store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+ store <2 x i128> %shift, ptr addrspace(1) null
ret void
}
@@ -643,7 +643,7 @@ define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
%shift = ashr <2 x i128> %lhs, %rhs
- store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+ store <2 x i128> %shift, ptr addrspace(1) null
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
index 1c42fe4711ed..8b0ffcf83f71 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
@@ -8,10 +8,10 @@
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = lshr i64 %val, 35
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -20,10 +20,10 @@ define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)*
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = lshr i64 %val, 63
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -32,10 +32,10 @@ define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)*
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_33(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = lshr i64 %val, 33
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -43,10 +43,10 @@ define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)*
; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = lshr i64 %val, 32
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -58,11 +58,11 @@ define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)*
; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[LO]], 8, 23
; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_and_i64_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff
%shl = lshr i64 %and, 40
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -73,10 +73,10 @@ define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 35
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -84,10 +84,10 @@ define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspac
; GCN-DAG: buffer_load_dword v[[HI:[0-9]+]]
; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 32
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -96,28 +96,28 @@ define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspac
; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 63
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x)
; GCN-LABEL: {{^}}ashr_i64_const_32:
-define amdgpu_kernel void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @ashr_i64_const_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = ashr i64 %val, 32
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}ashr_i64_const_63:
-define amdgpu_kernel void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @ashr_i64_const_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = ashr i64 %val, 63
- store i64 %shl, i64 addrspace(1)* %out
+ store i64 %shl, ptr addrspace(1) %out
ret void
}
@@ -125,11 +125,11 @@ define amdgpu_kernel void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspa
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 31, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_31_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
@@ -137,11 +137,11 @@ define amdgpu_kernel void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addr
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
; GCN: buffer_store_short [[SHL]]
-define amdgpu_kernel void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_15_i16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 15
%trunc = trunc i64 %shl to i16
- store i16 %trunc, i16 addrspace(1)* %out
+ store i16 %trunc, ptr addrspace(1) %out
ret void
}
@@ -149,11 +149,11 @@ define amdgpu_kernel void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addr
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
; GCN: buffer_store_short [[SHL]]
-define amdgpu_kernel void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32, i32 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_15_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i32, ptr addrspace(1) %in
%shl = shl i32 %val, 15
%trunc = trunc i32 %shl to i16
- store i16 %trunc, i16 addrspace(1)* %out
+ store i16 %trunc, ptr addrspace(1) %out
ret void
}
@@ -161,11 +161,11 @@ define amdgpu_kernel void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addr
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 7, [[VAL]]
; GCN: buffer_store_byte [[SHL]]
-define amdgpu_kernel void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_7_i8_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 7
%trunc = trunc i64 %shl to i8
- store i8 %trunc, i8 addrspace(1)* %out
+ store i8 %trunc, ptr addrspace(1) %out
ret void
}
@@ -174,11 +174,11 @@ define amdgpu_kernel void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspa
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 2, [[SHL]]
; GCN: buffer_store_byte [[AND]]
-define amdgpu_kernel void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_1_i2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 1
%trunc = trunc i64 %shl to i2
- store i2 %trunc, i2 addrspace(1)* %out
+ store i2 %trunc, ptr addrspace(1) %out
ret void
}
@@ -186,11 +186,11 @@ define amdgpu_kernel void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspa
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_1_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 1
%trunc = trunc i64 %shl to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
@@ -198,22 +198,22 @@ define amdgpu_kernel void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrs
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[VAL]]
; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_16_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_16_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 16
%trunc = trunc i64 %shl to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}trunc_shl_33_i32_i64:
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[ZERO]]
-define amdgpu_kernel void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_33_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 33
%trunc = trunc i64 %shl to i32
- store i32 %trunc, i32 addrspace(1)* %out
+ store i32 %trunc, ptr addrspace(1) %out
ret void
}
@@ -222,11 +222,11 @@ define amdgpu_kernel void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addr
; GCN-DAG: v_lshlrev_b32_e32 v[[RESHI:[0-9]+]], 16, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 v[[RESLO:[0-9]+]], 16, v[[LO]]
; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
-define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %val = load <2 x i64>, <2 x i64> addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load <2 x i64>, ptr addrspace(1) %in
%shl = shl <2 x i64> %val, <i64 16, i64 16>
%trunc = trunc <2 x i64> %shl to <2 x i32>
- store <2 x i32> %trunc, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %trunc, ptr addrspace(1) %out
ret void
}
@@ -235,12 +235,12 @@ define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out
; GCN: v_lshl_b64 v[[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]], [[VAL]], 31
; GCN: buffer_store_dword v[[RESLO]]
; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
-define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load i64, ptr addrspace(1) %in
%shl = shl i64 %val, 31
%trunc = trunc i64 %shl to i32
- store volatile i32 %trunc, i32 addrspace(1)* %out
- store volatile i64 %shl, i64 addrspace(1)* %in
+ store volatile i32 %trunc, ptr addrspace(1) %out
+ store volatile i64 %shl, ptr addrspace(1) %in
ret void
}
@@ -248,14 +248,14 @@ define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out
; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GCN-NOT: v_lshl_b64
; GCN-NOT: v_lshlrev_b64
-define amdgpu_kernel void @trunc_shl_and31(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_and31(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 31
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = shl i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -264,41 +264,41 @@ bb:
; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}}
; GCN-NOT: v_lshl_b64
; GCN-NOT: v_lshlrev_b64
-define amdgpu_kernel void @trunc_shl_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 30
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = shl i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
; GCN-LABEL: {{^}}trunc_shl_wrong_and63:
; Negative test, wrong constant
; GCN: v_lshl_b64
-define amdgpu_kernel void @trunc_shl_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp3 = and i32 %arg2, 63
%tmp4 = zext i32 %tmp3 to i64
%tmp5 = shl i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
; GCN-LABEL: {{^}}trunc_shl_no_and:
; Negative test, shift can be full 64 bit
; GCN: v_lshl_b64
-define amdgpu_kernel void @trunc_shl_no_and(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_no_and(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
bb:
- %tmp = load i64, i64 addrspace(1)* %arg, align 8
+ %tmp = load i64, ptr addrspace(1) %arg, align 8
%tmp4 = zext i32 %arg2 to i64
%tmp5 = shl i64 %tmp, %tmp4
%tmp6 = trunc i64 %tmp5 to i32
- store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+ store i32 %tmp6, ptr addrspace(1) %arg1, align 4
ret void
}
@@ -307,10 +307,10 @@ bb:
; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4
; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 5
; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 6
-define amdgpu_kernel void @trunc_shl_vec_vec(<4 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @trunc_shl_vec_vec(ptr addrspace(1) %arg) {
bb:
- %v = load <4 x i64>, <4 x i64> addrspace(1)* %arg, align 32
+ %v = load <4 x i64>, ptr addrspace(1) %arg, align 32
%shl = shl <4 x i64> %v, <i64 3, i64 4, i64 5, i64 6>
- store <4 x i64> %shl, <4 x i64> addrspace(1)* %arg, align 32
+ store <4 x i64> %shl, ptr addrspace(1) %arg, align 32
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
index 845b88b86e3d..351593a0826f 100644
--- a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
@@ -9,14 +9,14 @@
; CHECK-NOT: v_lshl
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
-define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @add_const_offset(ptr addrspace(1) nocapture %arg) {
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%add = add i32 %id, 200
%shl = shl i32 %add, 2
- %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl
- %val = load i32, i32 addrspace(1)* %ptr, align 4
- store i32 %val, i32 addrspace(1)* %arg, align 4
+ %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %shl
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ store i32 %val, ptr addrspace(1) %arg, align 4
ret void
}
@@ -26,14 +26,14 @@ bb:
; CHECK-NOT: v_lshl
; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
-define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @or_const_offset(ptr addrspace(1) nocapture %arg) {
bb:
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%add = or i32 %id, 256
%shl = shl i32 %add, 2
- %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl
- %val = load i32, i32 addrspace(1)* %ptr, align 4
- store i32 %val, i32 addrspace(1)* %arg, align 4
+ %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %shl
+ %val = load i32, ptr addrspace(1) %ptr, align 4
+ store i32 %val, ptr addrspace(1) %arg, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 8e59750efa78..7e6da2c321f7 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.x() #0
-define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -59,15 +59,15 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add
; EG-NEXT: LSHL T0.X, T0.X, T0.Z,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = shl <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -129,15 +129,15 @@ define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add
; EG-NEXT: LSHL T0.X, T0.X, T1.X,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = shl <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -203,15 +203,15 @@ define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
- %a = load i16, i16 addrspace(1)* %in
- %b = load i16, i16 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1
+ %a = load i16, ptr addrspace(1) %in
+ %b = load i16, ptr addrspace(1) %b_ptr
%result = shl i16 %a, %b
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_s(ptr addrspace(1) %out, ptr addrspace(1) %in, i16 %b) {
; SI-LABEL: shl_i16_v_s:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -278,13 +278,13 @@ define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)*
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %a = load i16, i16 addrspace(1)* %in
+ %a = load i16, ptr addrspace(1) %in
%result = shl i16 %a, %b
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_compute_s(ptr addrspace(1) %out, ptr addrspace(1) %in, i16 %b) {
; SI-LABEL: shl_i16_v_compute_s:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -360,14 +360,14 @@ define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrs
; EG-NEXT: MOV * T0.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %a = load i16, i16 addrspace(1)* %in
+ %a = load i16, ptr addrspace(1) %in
%b.add = add i16 %b, 3
%result = shl i16 %a, %b.add
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16_computed_amount(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_i16_computed_amount:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -458,18 +458,18 @@ define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 a
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
- %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1
- %a = load volatile i16, i16 addrspace(1)* %in
- %b = load volatile i16, i16 addrspace(1)* %b_ptr
+ %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
+ %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i16 1
+ %a = load volatile i16, ptr addrspace(1) %in
+ %b = load volatile i16, ptr addrspace(1) %b_ptr
%b.add = add i16 %b, 3
%result = shl i16 %a, %b.add
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
+define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
; SI-LABEL: shl_i16_i_s:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -523,11 +523,11 @@ define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = shl i16 %a, 12
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v2i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -617,17 +617,17 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i32 %tid
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
+ %a = load <2 x i16>, ptr addrspace(1) %in
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = shl <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v4i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -747,17 +747,17 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
; EG-NEXT: MOV T7.X, PV.Y,
; EG-NEXT: MOV * T10.X, T6.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
- %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep
- %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+ %gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
+ %a = load <4 x i16>, ptr addrspace(1) %gep
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = shl <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %gep.out
+ store <4 x i16> %result, ptr addrspace(1) %gep.out
ret void
}
-define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -816,15 +816,15 @@ define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1)* %in
- %b = load i64, i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+ %a = load i64, ptr addrspace(1) %in
+ %b = load i64, ptr addrspace(1) %b_ptr
%result = shl i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -899,15 +899,15 @@ define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add
; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0,
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
+ %a = load <2 x i64>, ptr addrspace(1) %in
+ %b = load <2 x i64>, ptr addrspace(1) %b_ptr
%result = shl <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: shl_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1023,16 +1023,16 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+ %a = load <4 x i64>, ptr addrspace(1) %in
+ %b = load <4 x i64>, ptr addrspace(1) %b_ptr
%result = shl <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %result, ptr addrspace(1) %out
ret void
}
; Make sure load width gets reduced to i32 load.
-define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @s_shl_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
; SI-LABEL: s_shl_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0x13
@@ -1069,11 +1069,11 @@ define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = shl i64 %a, 32
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_shl_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_shl_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1132,15 +1132,15 @@ define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)
; EG-NEXT: MOV * T1.Y, T0.X,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.amdgcn.workgroup.id.x() #0
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.in
%result = shl i64 %a, 32
- store i64 %result, i64 addrspace(1)* %gep.out
+ store i64 %result, ptr addrspace(1) %gep.out
ret void
}
-define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_shl_constant_i64(ptr addrspace(1) %out, i64 %a) {
; SI-LABEL: s_shl_constant_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1194,11 +1194,11 @@ define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 281474976710655, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-LABEL: v_shl_constant_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1263,13 +1263,13 @@ define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrsp
; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %a = load i64, ptr addrspace(1) %aptr, align 8
%shl = shl i64 1231231234567, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_i64_32_bit_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-LABEL: v_shl_i64_32_bit_constant:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1331,13 +1331,13 @@ define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %a = load i64, ptr addrspace(1) %aptr, align 8
%shl = shl i64 1234567, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
; SI-LABEL: v_shl_inline_imm_64_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1394,13 +1394,13 @@ define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %a = load i64, ptr addrspace(1) %aptr, align 8
%shl = shl i64 64, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_64_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1446,11 +1446,11 @@ define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 64, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_1_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1497,11 +1497,11 @@ define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 ad
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 1, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_1_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1544,11 +1544,11 @@ define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 4607182418800017408, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_neg_1_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1591,11 +1591,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 13830554455654793216, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_0_5_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1638,11 +1638,11 @@ define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 4602678819172646912, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_neg_0_5_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1685,11 +1685,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 13826050856027422720, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_2_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1732,11 +1732,11 @@ define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 4611686018427387904, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_neg_2_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1779,11 +1779,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 13835058055282163712, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1826,11 +1826,11 @@ define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 4616189618054758400, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_neg_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0xd
@@ -1873,14 +1873,14 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 13839561654909534208, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
; Test with the 64-bit integer bitpattern for a 32-bit float in the
; low 32-bits, which is not a valid 64-bit inline immmediate.
-define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_f32_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -1929,12 +1929,12 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 1082130432, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
; FIXME: Copy of -1 register
-define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -1986,11 +1986,11 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %o
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 -1065353216, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_high_imm_f32_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -2037,11 +2037,11 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 4647714815446351872, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
-define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
; SI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -2088,7 +2088,7 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%shl = shl i64 13871086852301127680, %a
- store i64 %shl, i64 addrspace(1)* %out, align 8
+ store i64 %shl, ptr addrspace(1) %out, align 8
ret void
}
@@ -2128,11 +2128,11 @@ define amdgpu_kernel void @test_mul2(i32 %p) {
; EG-NEXT: LSHL * T1.X, KC0[2].Y, 1,
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
%i = mul i32 %p, 2
- store volatile i32 %i, i32 addrspace(1)* undef
+ store volatile i32 %i, ptr addrspace(1) undef
ret void
}
-define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) {
+define void @shl_or_k(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: shl_or_k:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2169,11 +2169,11 @@ define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) {
; EG-NEXT: 4(5.605194e-45), 2(2.802597e-45)
%tmp0 = or i32 %in, 1
%tmp2 = shl i32 %tmp0, 2
- store i32 %tmp2, i32 addrspace(1)* %out
+ store i32 %tmp2, ptr addrspace(1) %out
ret void
}
-define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) {
+define void @shl_or_k_two_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %in) {
; SI-LABEL: shl_or_k_two_uses:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2213,8 +2213,8 @@ define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tmp0 = or i32 %in, 1
%tmp2 = shl i32 %tmp0, 2
- store i32 %tmp2, i32 addrspace(1)* %out0
- store i32 %tmp0, i32 addrspace(1)* %out1
+ store i32 %tmp2, ptr addrspace(1) %out0
+ store i32 %tmp0, ptr addrspace(1) %out1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index 8ea1879919b3..ff66b7d99686 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -5,7 +5,7 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_shl_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
; GFX9-LABEL: s_shl_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -82,11 +82,11 @@ define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%result = shl <2 x i16> %lhs, %rhs
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: v_shl_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -163,17 +163,17 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <2 x i16>, ptr addrspace(1) %in.gep
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = shl <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
; GFX9-LABEL: shl_v_s_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -257,15 +257,15 @@ define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = shl <2 x i16> %vgpr, %sgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
; GFX9-LABEL: shl_s_v_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -349,15 +349,15 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = shl <2 x i16> %sgpr, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: shl_imm_v_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -434,15 +434,15 @@ define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = shl <2 x i16> <i16 8, i16 8>, %vgpr
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: shl_v_imm_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -515,15 +515,15 @@ define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
%result = shl <2 x i16> %vgpr, <i16 8, i16 8>
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+ store <2 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: v_shl_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -613,17 +613,17 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
- %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
- %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+ %a = load <4 x i16>, ptr addrspace(1) %in.gep
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = shl <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
-define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX9-LABEL: shl_v_imm_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -709,11 +709,11 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1
; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
- %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
- %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
- %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+ %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+ %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+ %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
%result = shl <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+ store <4 x i16> %result, ptr addrspace(1) %out.gep
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
index 80755a01f425..3cf3a3925bb1 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -9,13 +9,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+ %val = load i32, ptr addrspace(1) %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
- store i32 %result, i32 addrspace(1)* %out, align 4
+ store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -25,14 +25,14 @@ define amdgpu_kernel void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace
; SI-DAG: buffer_store_dword [[ADDREG]]
; SI-DAG: buffer_store_dword [[SHLREG]]
; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+ %val = load i32, ptr addrspace(1) %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
- store i32 %result, i32 addrspace(1)* %out0, align 4
- store i32 %add, i32 addrspace(1)* %out1, align 4
+ store i32 %result, ptr addrspace(1) %out0, align 4
+ store i32 %add, ptr addrspace(1) %out1, align 4
ret void
}
@@ -43,13 +43,13 @@ define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_999_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
- %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32, i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+ %val = load i32, ptr addrspace(1) %ptr, align 4
%shl = add i32 %val, 999
%result = shl i32 %shl, 2
- store i32 %result, i32 addrspace(1)* %out, align 4
+ store i32 %result, ptr addrspace(1) %out, align 4
ret void
}
@@ -60,11 +60,11 @@ define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspa
; SI: s_addk_i32 [[RESULT]], 0x3d8
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
; SI: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @test_add_shl_add_constant(i32 addrspace(1)* %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %shl, %y
- store i32 %add.1, i32 addrspace(1)* %out, align 4
+ store i32 %add.1, ptr addrspace(1) %out, align 4
ret void
}
@@ -76,11 +76,11 @@ define amdgpu_kernel void @test_add_shl_add_constant(i32 addrspace(1)* %out, [8
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
; SI: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant_inv(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %y, %shl
- store i32 %add.1, i32 addrspace(1)* %out, align 4
+ store i32 %add.1, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 30cecc085776..3c9a05af3c59 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -19,13 +19,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
- store float %val0, float addrspace(1)* %out
+ %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+ %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
+ store float %val0, ptr addrspace(1) %out
ret void
}
@@ -39,14 +39,14 @@ define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 add
; GCN-DAG: buffer_store_dword [[RESULT]]
; GCN-DAG: buffer_store_dword [[ADDUSE]]
; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_1(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+ %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
%shl_add_use = shl i32 %idx.0, 2
- store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
- store float %val0, float addrspace(1)* %out
+ store i32 %shl_add_use, ptr addrspace(1) %add_use, align 4
+ store float %val0, ptr addrspace(1) %out
ret void
}
@@ -55,13 +55,13 @@ define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 add
; GCN-LABEL: {{^}}load_shl_base_lds_max_offset
; GCN: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 65535
- %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
- %val0 = load i8, i8 addrspace(3)* %arrayidx0
- store i32 %idx.0, i32 addrspace(1)* %add_use
- store i8 %val0, i8 addrspace(1)* %out
+ %arrayidx0 = getelementptr inbounds [65536 x i8], ptr addrspace(3) @maxlds, i32 0, i32 %idx.0
+ %val0 = load i8, ptr addrspace(3) %arrayidx0
+ store i32 %idx.0, ptr addrspace(1) %add_use
+ store i8 %val0, ptr addrspace(1) %out
ret void
}
@@ -73,15 +73,15 @@ define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i
; GCN: s_mov_b32 m0, -1
; GCN: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @load_shl_base_lds_2(ptr addrspace(1) %out) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 64
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float, float addrspace(3)* %arrayidx0, align 4
- %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
- %val1 = load float, float addrspace(3)* %arrayidx1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+ %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
+ %val1 = load float, ptr addrspace(3) %arrayidx1, align 4
%sum = fadd float %val0, %val1
- store float %sum, float addrspace(1)* %out, align 4
+ store float %sum, ptr addrspace(1) %out, align 4
ret void
}
@@ -89,12 +89,12 @@ define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- store float 1.0, float addrspace(3)* %arrayidx0, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+ store float 1.0, ptr addrspace(3) %arrayidx0, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -104,13 +104,13 @@ define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 ad
@lds2 = addrspace(3) global [512 x i32] undef, align 4
-; define amdgpu_kernel void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
-; store i32 %val, i32 addrspace(1)* %out, align 4
-; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+; %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+; %val = load atomic i32, ptr addrspace(3) %arrayidx0 seq_cst, align 4
+; store i32 %val, ptr addrspace(1) %out, align 4
+; store i32 %idx.0, ptr addrspace(1) %add_use, align 4
; ret void
; }
@@ -119,14 +119,14 @@ define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 ad
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
+define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %pair = cmpxchg ptr addrspace(3) %arrayidx0, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
- store i32 %result, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ store i32 %result, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -134,13 +134,13 @@ define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out,
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_swap_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw xchg ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -148,13 +148,13 @@ define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i3
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_add_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw add ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -162,13 +162,13 @@ define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_sub_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw sub ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -176,13 +176,13 @@ define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_and_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw and ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -190,13 +190,13 @@ define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_or_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw or ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -204,23 +204,23 @@ define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw xor ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
-; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-; store i32 %val, i32 addrspace(1)* %out, align 4
-; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+; %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+; %val = atomicrmw nand ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+; store i32 %val, ptr addrspace(1) %out, align 4
+; store i32 %idx.0, ptr addrspace(1) %add_use, align 4
; ret void
; }
@@ -228,13 +228,13 @@ define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_min_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw min ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -242,13 +242,13 @@ define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_max_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw max ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -256,13 +256,13 @@ define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umin_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw umin ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -270,13 +270,13 @@ define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i3
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; GCN: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
- %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
- store i32 %val, i32 addrspace(1)* %out, align 4
- store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+ %val = atomicrmw umax ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+ store i32 %val, ptr addrspace(1) %out, align 4
+ store i32 %idx.0, ptr addrspace(1) %add_use, align 4
ret void
}
@@ -289,10 +289,10 @@ define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
- store volatile i32 9, i32 addrspace(3)* %ptr0
- store volatile i32 10, i32 addrspace(3)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+ store volatile i32 9, ptr addrspace(3) %ptr0
+ store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
@@ -306,10 +306,10 @@ define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
%idx.add = add nuw i32 %idx, 8191
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
- store volatile i32 9, i32 addrspace(3)* %ptr0
- store volatile i32 10, i32 addrspace(3)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+ store volatile i32 9, ptr addrspace(3) %ptr0
+ store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
@@ -323,10 +323,10 @@ define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
%idx.add = add nuw i32 %idx, 4096
%shl0 = shl i32 %idx.add, 4
%shl1 = shl i32 %idx.add, 5
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
- store volatile i32 9, i32 addrspace(3)* %ptr0
- store volatile i32 10, i32 addrspace(3)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+ store volatile i32 9, ptr addrspace(3) %ptr0
+ store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
@@ -340,10 +340,10 @@ define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
%idx.add = add nuw i32 %idx, 4
%shl0 = shl i32 %idx.add, 2
%shl1 = shl i32 %idx.add, 3
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
- store volatile i32 9, i32 addrspace(5)* %ptr0
- store volatile i32 10, i32 addrspace(5)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+ store volatile i32 9, ptr addrspace(5) %ptr0
+ store volatile i32 10, ptr addrspace(5) %ptr1
ret void
}
@@ -358,10 +358,10 @@ define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #
%idx.add = add nuw i32 %idx, 511
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
- store volatile i32 9, i32 addrspace(5)* %ptr0
- store volatile i32 10, i32 addrspace(5)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+ store volatile i32 9, ptr addrspace(5) %ptr0
+ store volatile i32 10, ptr addrspace(5) %ptr1
ret void
}
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
@@ -375,10 +375,10 @@ define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.a
%idx.add = add nuw i32 %idx, 256
%shl0 = shl i32 %idx.add, 4
%shl1 = shl i32 %idx.add, 5
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
- store volatile i32 9, i32 addrspace(5)* %ptr0
- store volatile i32 10, i32 addrspace(5)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+ store volatile i32 9, ptr addrspace(5) %ptr0
+ store volatile i32 10, ptr addrspace(5) %ptr1
ret void
}
@@ -393,10 +393,10 @@ define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
%idx.add = or i32 %idx, 4
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
- store volatile i32 9, i32 addrspace(3)* %ptr0
- store volatile i32 10, i32 addrspace(3)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+ store volatile i32 9, ptr addrspace(3) %ptr0
+ store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
@@ -410,10 +410,10 @@ define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
%idx.add = or i32 %idx, 8191
%shl0 = shl i32 %idx.add, 3
%shl1 = shl i32 %idx.add, 4
- %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
- %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
- store volatile i32 9, i32 addrspace(3)* %ptr0
- store volatile i32 10, i32 addrspace(3)* %ptr1
+ %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+ %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+ store volatile i32 9, ptr addrspace(3) %ptr0
+ store volatile i32 10, ptr addrspace(3) %ptr1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
index 66b7b342b31d..c04cb89e9527 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
@@ -7,16 +7,16 @@
; GCN: v_add_co_ci_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc_lo, 0, v5, vcc_lo
; GCN: global_atomic_csub v{{[0-9]+}}, v[[[LO]]:[[HI]]], [[K]], off offset:512 glc
; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
- %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+ %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
%shl = shl i64 %cast, 2
- %castback = inttoptr i64 %shl to i32 addrspace(1)*
- %val = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %castback, i32 43)
- store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+ %castback = inttoptr i64 %shl to ptr addrspace(1)
+ %val = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %castback, i32 43)
+ store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
ret i32 %val
}
-declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
attributes #0 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
index d4709f2f7c18..30e799229359 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
@@ -7,13 +7,13 @@
; GCN-DAG: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
; GCN-DAG: global_atomic_and v[[[LO]]:[[HI]]], [[THREE]], off offset:512
; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define void @shl_base_atomicrmw_global_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
- %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+ %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
%shl = shl i64 %cast, 2
- %castback = inttoptr i64 %shl to i32 addrspace(1)*
- %val = atomicrmw and i32 addrspace(1)* %castback, i32 3 seq_cst
- store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+ %castback = inttoptr i64 %shl to ptr addrspace(1)
+ %val = atomicrmw and ptr addrspace(1) %castback, i32 3 seq_cst
+ store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
ret void
}
@@ -24,17 +24,17 @@ define void @shl_base_atomicrmw_global_ptr(i32 addrspace(1)* %out, i64 addrspace
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x42c80000
; GCN-DAG: global_atomic_add_f32 v[[[LO]]:[[HI]]], [[K]], off offset:512
; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define void @shl_base_global_ptr_global_atomic_fadd(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
- %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
- %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+ %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+ %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
%shl = shl i64 %cast, 2
- %castback = inttoptr i64 %shl to float addrspace(1)*
- call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %castback, float 100.0)
- store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+ %castback = inttoptr i64 %shl to ptr addrspace(1)
+ call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %castback, float 100.0)
+ store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
ret void
}
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #1
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 2d43c58aa38a..44881e8345e5 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -5,7 +5,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() #0
-define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -61,15 +61,15 @@ define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
; EG-NEXT: ASHR T0.X, T0.X, T0.Z,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = ashr <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -134,17 +134,17 @@ define amdgpu_kernel void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
; EG-NEXT: ASHR T0.X, T0.X, T1.X,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = ashr <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
; FIXME: The ashr operation is uniform, but because its operands come from a
; global load we end up with the vector instructions rather than scalar.
-define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v2i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -230,17 +230,17 @@ define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ad
; EG-NEXT: OR_INT T6.X, PS, PV.W,
; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %in
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in, i16 1
+ %a = load <2 x i16>, ptr addrspace(1) %in
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = ashr <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
; FIXME: The ashr operation is uniform, but because its operands come from a
; global load we end up with the vector instructions rather than scalar.
-define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v4i16:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -393,15 +393,15 @@ define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ad
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: MOV T7.X, PV.Y,
; EG-NEXT: MOV * T10.X, T6.X,
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
- %a = load <4 x i16>, <4 x i16> addrspace(1)* %in
- %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
+ %a = load <4 x i16>, ptr addrspace(1) %in
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = ashr <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out
+ store <4 x i16> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_ashr_i64(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_ashr_i64:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dword s4, s[0:1], 0xb
@@ -445,11 +445,11 @@ define amdgpu_kernel void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
entry:
%in.ext = sext i32 %in to i64
%ashr = ashr i64 %in.ext, 8
- store i64 %ashr, i64 addrspace(1)* %out
+ store i64 %ashr, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_i64_2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_i64_2:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -511,15 +511,15 @@ define amdgpu_kernel void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)*
; EG-NEXT: 31(4.344025e-44), 2(2.802597e-45)
; EG-NEXT: CNDE_INT * T0.Y, T1.W, T1.Z, PV.W,
entry:
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1)* %in
- %b = load i64, i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+ %a = load i64, ptr addrspace(1) %in
+ %b = load i64, ptr addrspace(1) %b_ptr
%result = ashr i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -594,16 +594,16 @@ define amdgpu_kernel void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> ad
; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
; EG-NEXT: CNDE_INT * T0.Y, T2.W, T2.Y, T1.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
+ %a = load <2 x i64>, ptr addrspace(1) %in
+ %b = load <2 x i64>, ptr addrspace(1) %b_ptr
%result = ashr <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %result, ptr addrspace(1) %out
ret void
}
; FIXME: Broken on r600
-define amdgpu_kernel void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: ashr_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -712,15 +712,15 @@ define amdgpu_kernel void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> ad
; EG-NEXT: LSHR T3.X, KC0[2].Y, literal.x,
; EG-NEXT: CNDE_INT * T2.Y, T4.W, T5.Y, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+ %a = load <4 x i64>, ptr addrspace(1) %in
+ %b = load <4 x i64>, ptr addrspace(1) %b_ptr
%result = ashr <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @s_ashr_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
; SI-LABEL: s_ashr_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s6, s[0:1], 0x14
@@ -770,11 +770,11 @@ define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = ashr i64 %a, 32
%add = add i64 %result, %b
- store i64 %add, i64 addrspace(1)* %out
+ store i64 %add, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_ashr_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -831,15 +831,15 @@ define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1
; EG-NEXT: ASHR * T0.Y, T0.X, literal.y,
; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44)
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.in
%result = ashr i64 %a, 32
- store i64 %result, i64 addrspace(1)* %gep.out
+ store i64 %result, ptr addrspace(1) %gep.out
ret void
}
-define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @s_ashr_63_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
; SI-LABEL: s_ashr_63_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s6, s[0:1], 0x14
@@ -889,11 +889,11 @@ define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = ashr i64 %a, 63
%add = add i64 %result, %b
- store i64 %add, i64 addrspace(1)* %out
+ store i64 %add, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_ashr_63_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -954,11 +954,11 @@ define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1
; EG-NEXT: MOV * T0.Y, PV.X,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.in
%result = ashr i64 %a, 63
- store i64 %result, i64 addrspace(1)* %gep.out
+ store i64 %result, ptr addrspace(1) %gep.out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index 9693c44fac30..a759896ac424 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -5,7 +5,7 @@
declare i32 @llvm.amdgcn.workitem.id.x() #0
-define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: lshr_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -53,15 +53,15 @@ define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i
; EG-NEXT: LSHR T0.X, T0.X, T0.Y,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+ %a = load i32, ptr addrspace(1) %in
+ %b = load i32, ptr addrspace(1) %b_ptr
%result = lshr i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: lshr_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -113,15 +113,15 @@ define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
; EG-NEXT: LSHR T0.X, T0.X, T0.Z,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = lshr <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: lshr_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -183,15 +183,15 @@ define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
; EG-NEXT: LSHR T0.X, T0.X, T1.X,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = lshr <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: lshr_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -247,15 +247,15 @@ define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %i
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: CNDE_INT * T0.Y, T1.W, T1.Z, 0.0,
- %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
- %a = load i64, i64 addrspace(1)* %in
- %b = load i64, i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+ %a = load i64, ptr addrspace(1) %in
+ %b = load i64, ptr addrspace(1) %b_ptr
%result = lshr i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: lshr_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -358,16 +358,16 @@ define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> ad
; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
; EG-NEXT: CNDE_INT * T1.Y, T4.W, T4.Y, 0.0,
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+ %a = load <4 x i64>, ptr addrspace(1) %in
+ %b = load <4 x i64>, ptr addrspace(1) %b_ptr
%result = lshr <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %result, ptr addrspace(1) %out
ret void
}
; Make sure load width gets reduced to i32 load.
-define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @s_lshr_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
; SI-LABEL: s_lshr_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[0:1], 0x14
@@ -404,11 +404,11 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%result = lshr i64 %a, 32
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_lshr_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; SI-LABEL: v_lshr_32_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -465,11 +465,11 @@ define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1
; EG-NEXT: LSHR * T1.X, PV.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
- %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
- %a = load i64, i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+ %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+ %a = load i64, ptr addrspace(1) %gep.in
%result = lshr i64 %a, 32
- store i64 %result, i64 addrspace(1)* %gep.out
+ store i64 %result, ptr addrspace(1) %gep.out
ret void
}
More information about the llvm-commits
mailing list