[llvm] da0293e - AMDGPU: Bulk update some r600 tests to opaque pointers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 28 08:25:50 PST 2022
Author: Matt Arsenault
Date: 2022-11-28T11:25:44-05:00
New Revision: da0293e3cc66803217e9331d95a89ab6b59cdaae
URL: https://github.com/llvm/llvm-project/commit/da0293e3cc66803217e9331d95a89ab6b59cdaae
DIFF: https://github.com/llvm/llvm-project/commit/da0293e3cc66803217e9331d95a89ab6b59cdaae.diff
LOG: AMDGPU: Bulk update some r600 tests to opaque pointers
r600.amdgpu-alias-analysis.ll has something strange going on where
AliasAnalyisEvaluator's printing is reproducing the typed pointer
syntax.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
llvm/test/CodeGen/AMDGPU/big_alu.ll
llvm/test/CodeGen/AMDGPU/elf.r600.ll
llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
llvm/test/CodeGen/AMDGPU/pv.ll
llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
llvm/test/CodeGen/AMDGPU/r600.add.ll
llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
llvm/test/CodeGen/AMDGPU/r600.sub.ll
llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
llvm/test/CodeGen/AMDGPU/r600cfg.ll
llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
llvm/test/CodeGen/AMDGPU/set-dx10.ll
llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
llvm/test/CodeGen/AMDGPU/swizzle-export.ll
llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
index 34eb088b16f4..eb29e0ac8ec6 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
@@ -4,7 +4,7 @@
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
-define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_def(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_def:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -20,13 +20,13 @@ entry:
%1 = and i32 %z, %0
%2 = and i32 %y, %x
%3 = or i32 %1, %2
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
; SHA-256 Ch function
; z ^ (x & (y ^ z))
-define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_sha256_ch:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -41,13 +41,13 @@ entry:
%0 = xor i32 %y, %z
%1 = and i32 %x, %0
%2 = xor i32 %z, %1
- store i32 %2, i32 addrspace(1)* %out
+ store i32 %2, ptr addrspace(1) %out
ret void
}
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
-define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
; R600-LABEL: bfi_sha256_ma:
; R600: ; %bb.0: ; %entry
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
@@ -64,7 +64,7 @@ entry:
%1 = or i32 %x, %z
%2 = and i32 %y, %1
%3 = or i32 %0, %2
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -149,7 +149,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
%and1 = and i64 %not.a, %mask
%bitselect = or i64 %and0, %and1
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -176,7 +176,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
%bitselect = xor i64 %and, %mask
%scalar.use = add i64 %bitselect, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
@@ -232,6 +232,6 @@ entry:
%or1 = or i64 %and0, %and1
%scalar.use = add i64 %or1, 10
- store i64 %scalar.use, i64 addrspace(1)* undef
+ store i64 %scalar.use, ptr addrspace(1) undef
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll
index 51387c8b79cb..999c56645f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/big_alu.ll
+++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll
@@ -50,29 +50,29 @@ main_body:
%tmp43 = extractelement <4 x float> %reg7, i32 1
%tmp44 = extractelement <4 x float> %reg7, i32 2
%tmp45 = extractelement <4 x float> %reg7, i32 3
- %tmp46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %tmp46 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
%tmp47 = extractelement <4 x float> %tmp46, i32 0
- %tmp48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %tmp48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
%tmp49 = extractelement <4 x float> %tmp48, i32 1
- %tmp50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %tmp50 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
%tmp51 = extractelement <4 x float> %tmp50, i32 2
- %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12)
%tmp53 = extractelement <4 x float> %tmp52, i32 0
- %tmp54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %tmp54 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
%tmp55 = extractelement <4 x float> %tmp54, i32 0
- %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
%tmp57 = extractelement <4 x float> %tmp56, i32 1
- %tmp58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %tmp58 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
%tmp59 = extractelement <4 x float> %tmp58, i32 2
- %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
%tmp61 = extractelement <4 x float> %tmp60, i32 3
- %tmp62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %tmp62 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
%tmp63 = extractelement <4 x float> %tmp62, i32 0
- %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
%tmp65 = extractelement <4 x float> %tmp64, i32 1
- %tmp66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %tmp66 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
%tmp67 = extractelement <4 x float> %tmp66, i32 2
- %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp69 = extractelement <4 x float> %tmp68, i32 0
%tmp70 = fcmp oge float %tmp69, 3.500000e+00
%tmp71 = sext i1 %tmp70 to i32
@@ -80,7 +80,7 @@ main_body:
%tmp73 = bitcast float %tmp72 to i32
%tmp74 = icmp ne i32 %tmp73, 0
%. = select i1 %tmp74, float 0.000000e+00, float 0.000000e+00
- %tmp75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp75 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp76 = extractelement <4 x float> %tmp75, i32 0
%tmp77 = fcmp oge float %tmp76, 2.000000e+00
%tmp78 = sext i1 %tmp77 to i32
@@ -134,7 +134,7 @@ IF137: ; preds = %main_body
%tmp123 = insertelement <4 x float> %tmp122, float 0.000000e+00, i32 3
%tmp124 = call float @llvm.r600.dot4(<4 x float> %tmp119, <4 x float> %tmp123)
%tmp125 = fdiv float 1.000000e+00, %tmp124
- %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%tmp127 = extractelement <4 x float> %tmp126, i32 0
%tmp128 = fmul float %tmp127, %tmp125
%tmp129 = fmul float %tmp103, %tmp128
@@ -360,15 +360,15 @@ ENDIF136: ; preds = %ENDIF154, %main_bod
%tmp333 = fmul float %tmp318, %tmp332
%tmp334 = fmul float %tmp320, %tmp332
%tmp335 = fmul float %tmp322, %tmp332
- %tmp336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %tmp336 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
%tmp337 = extractelement <4 x float> %tmp336, i32 0
%tmp338 = fsub float -0.000000e+00, %tmp337
%tmp339 = fadd float 1.000000e+00, %tmp338
- %tmp340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %tmp340 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%tmp341 = extractelement <4 x float> %tmp340, i32 0
%tmp342 = fsub float -0.000000e+00, %tmp341
%tmp343 = fadd float 1.000000e+00, %tmp342
- %tmp344 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %tmp344 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
%tmp345 = extractelement <4 x float> %tmp344, i32 0
%tmp346 = fsub float -0.000000e+00, %tmp345
%tmp347 = fadd float 1.000000e+00, %tmp346
@@ -1136,7 +1136,7 @@ ENDIF175: ; preds = %IF176, %ENDIF172
%temp92.11 = phi float [ %tmp856, %IF176 ], [ %temp92.10, %ENDIF172 ]
%temp93.5 = phi float [ %tmp857, %IF176 ], [ %temp93.4, %ENDIF172 ]
%temp94.5 = phi float [ %tmp858, %IF176 ], [ %temp94.4, %ENDIF172 ]
- %tmp859 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %tmp859 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
%tmp860 = extractelement <4 x float> %tmp859, i32 0
%tmp861 = fcmp olt float %tmp860, %tmp181
%tmp862 = sext i1 %tmp861 to i32
@@ -1257,12 +1257,12 @@ ENDIF178: ; preds = %IF179, %ENDIF175
%tmp931 = fmul float %temp87.6, %tmp927
%tmp932 = fmul float %tmp2, -2.000000e+00
%tmp933 = fadd float %tmp932, 1.000000e+00
- %tmp934 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+ %tmp934 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 23)
%tmp935 = extractelement <4 x float> %tmp934, i32 2
%tmp936 = fsub float -0.000000e+00, %tmp935
%tmp937 = fadd float %tmp933, %tmp936
%tmp938 = fdiv float 1.000000e+00, %tmp937
- %tmp939 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+ %tmp939 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 24)
%tmp940 = extractelement <4 x float> %tmp939, i32 2
%tmp941 = fmul float %tmp940, %tmp938
%tmp942 = fsub float -0.000000e+00, %tmp53
diff --git a/llvm/test/CodeGen/AMDGPU/elf.r600.ll b/llvm/test/CodeGen/AMDGPU/elf.r600.ll
index 276310724630..503d518d1ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/elf.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf.r600.ll
@@ -9,9 +9,9 @@
; CONFIG-NEXT: .long 2
; CONFIG-NEXT: .long 165900
; CONFIG-NEXT: .long 0
-define amdgpu_kernel void @test(float addrspace(1)* %out, i32 %p) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %p) {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
- store float %r, float addrspace(1)* %out
+ store float %r, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
index 5cb0c616d15f..bb7257eb824a 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
define amdgpu_ps void @fetch_limits_r600() {
entry:
- %tmp = load <4 x float>, <4 x float> addrspace(8)* null
- %tmp1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %tmp3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %tmp4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %tmp6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %tmp7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %tmp = load <4 x float>, ptr addrspace(8) null
+ %tmp1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
+ %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
+ %tmp3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
+ %tmp4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
+ %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
+ %tmp6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
+ %tmp7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
+ %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
%tmp9 = shufflevector <4 x float> %tmp, <4 x float> %tmp, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp10 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
%tmp11 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
index d8f7c0daa8de..e0afdc8c6ba7 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
define amdgpu_ps void @fetch_limits_r700() {
entry:
- %0 = load <4 x float>, <4 x float> addrspace(8)* null
- %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
- %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
- %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
- %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
- %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
- %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %0 = load <4 x float>, ptr addrspace(8) null
+ %1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
+ %2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
+ %3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
+ %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
+ %5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
+ %6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
+ %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
+ %8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
+ %9 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
+ %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
+ %11 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
+ %12 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12)
+ %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 13)
+ %14 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
+ %15 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 15)
+ %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
%17 = shufflevector <4 x float> %0, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%18 = call <4 x float> @llvm.r600.tex(<4 x float> %17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
%19 = shufflevector <4 x float> %1, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
index 71bb4afa64ef..047e27be787e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
@@ -3,9 +3,9 @@
; EG-LABEL: {{^}}test_fmax_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
%val = call float @llvm.maxnum.f32(float %a, float %b)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, flo
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
; EG: MAX_DX10 {{.*}}[[OUT]]
; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
- store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ store <2 x float> %val, ptr addrspace(1) %out, align 8
ret void
}
@@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x
; EG: MAX_DX10 {{.*}}[[OUT]]
; EG: MAX_DX10 {{.*}}[[OUT]]
; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
- store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ store <4 x float> %val, ptr addrspace(1) %out, align 16
ret void
}
@@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x
; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
- store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ store <8 x float> %val, ptr addrspace(1) %out, align 32
ret void
}
@@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x
; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
- store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ store <16 x float> %val, ptr addrspace(1) %out, align 64
ret void
}
@@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
; EG: 2143289344(nan)
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MAX_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmax_var_immediate_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 * [[OUT]]
-define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.maxnum.f32(float %a, float 2.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmax_immediate_var_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.maxnum.f32(float 2.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmax_var_literal_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.maxnum.f32(float %a, float 99.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmax_literal_var_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.maxnum.f32(float 99.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
index 713e95c7f46e..8d85af091f53 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
@@ -3,9 +3,9 @@
; EG-LABEL: {{^}}test_fmin_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
%val = call float @llvm.minnum.f32(float %a, float %b)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, flo
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
; EG: MIN_DX10 {{.*}}[[OUT]]
; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
- store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+ store <2 x float> %val, ptr addrspace(1) %out, align 8
ret void
}
@@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x
; EG: MIN_DX10 {{.*}}[[OUT]]
; EG: MIN_DX10 {{.*}}[[OUT]]
; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
- store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+ store <4 x float> %val, ptr addrspace(1) %out, align 16
ret void
}
@@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x
; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
- store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+ store <8 x float> %val, ptr addrspace(1) %out, align 32
ret void
}
@@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x
; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
- store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+ store <16 x float> %val, ptr addrspace(1) %out, align 64
ret void
}
@@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 1.0, float 2.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %o
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0.0, float 0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float -0.0, float 0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
@@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG-NOT: MIN_DX10
; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float -0.0, float -0.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmin_var_immediate_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.minnum.f32(float %a, float 2.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmin_immediate_var_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.minnum.f32(float 2.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmin_var_literal_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.minnum.f32(float %a, float 99.0)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}fmin_literal_var_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
%val = call float @llvm.minnum.f32(float 99.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
+ store float %val, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
index b5a0de95acf5..9781e3fc828f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
@@ -7,15 +7,15 @@
; CHECK: CUBE * T{{[0-9]}}.W
define amdgpu_ps void @cube() {
main_body:
- %tmp = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp1 = extractelement <4 x float> %tmp, i32 3
- %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp3 = extractelement <4 x float> %tmp2, i32 0
%tmp4 = fdiv float %tmp3, %tmp1
- %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp6 = extractelement <4 x float> %tmp5, i32 1
%tmp7 = fdiv float %tmp6, %tmp1
- %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp9 = extractelement <4 x float> %tmp8, i32 2
%tmp10 = fdiv float %tmp9, %tmp1
%tmp11 = insertelement <4 x float> undef, float %tmp4, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
index de8a47741c94..21c8ea6c281f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
@@ -2,10 +2,10 @@
declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone
-define amdgpu_kernel void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
- %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
- %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
+define amdgpu_kernel void @test_dp4(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
+ %src0 = load <4 x float>, ptr addrspace(1) %a, align 16
+ %src1 = load <4 x float>, ptr addrspace(1) %b, align 16
%dp4 = call float @llvm.r600.dot4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
- store float %dp4, float addrspace(1)* %out, align 4
+ store float %dp4, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
index 93caafbb9524..7859bac7bc52 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
@@ -2,18 +2,18 @@
; EG-LABEL: {{^}}test_group_barrier:
; EG: GROUP_BARRIER
-define amdgpu_kernel void @test_group_barrier(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_group_barrier(ptr addrspace(1) %out) #0 {
entry:
%tmp = call i32 @llvm.r600.read.tidig.x()
- %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp
- store i32 %tmp, i32 addrspace(1)* %tmp1
+ %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
+ store i32 %tmp, ptr addrspace(1) %tmp1
call void @llvm.r600.group.barrier()
%tmp2 = call i32 @llvm.r600.read.local.size.x()
%tmp3 = sub i32 %tmp2, 1
%tmp4 = sub i32 %tmp3, %tmp
- %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4
- %tmp6 = load i32, i32 addrspace(1)* %tmp5
- store i32 %tmp6, i32 addrspace(1)* %tmp1
+ %tmp5 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp4
+ %tmp6 = load i32, ptr addrspace(1) %tmp5
+ store i32 %tmp6, ptr addrspace(1) %tmp1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
index 7f18fec3db6d..64f2fd26f1df 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -14,10 +14,10 @@
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -29,10 +29,10 @@ entry:
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -44,10 +44,10 @@ entry:
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -57,12 +57,12 @@ entry:
; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[X]], s[[Y]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) {
entry:
%x = call i32 @llvm.r600.read.local.size.x() #0
%y = call i32 @llvm.r600.read.local.size.y() #0
%val = mul i32 %x, %y
- store i32 %val, i32 addrspace(1)* %out
+ store i32 %val, ptr addrspace(1) %out
ret void
}
@@ -76,12 +76,12 @@ entry:
; GCN: s_mul_i32 [[VAL:s[0-9]+]], [[X]], [[Z]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) {
entry:
%x = call i32 @llvm.r600.read.local.size.x() #0
%z = call i32 @llvm.r600.read.local.size.z() #0
%val = mul i32 %x, %z
- store i32 %val, i32 addrspace(1)* %out
+ store i32 %val, ptr addrspace(1) %out
ret void
}
@@ -94,12 +94,12 @@ entry:
; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[#LOAD + 0]], s[[#LOAD + 1]]
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) {
entry:
%y = call i32 @llvm.r600.read.local.size.y() #0
%z = call i32 @llvm.r600.read.local.size.z() #0
%val = mul i32 %y, %z
- store i32 %val, i32 addrspace(1)* %out
+ store i32 %val, ptr addrspace(1) %out
ret void
}
@@ -115,14 +115,14 @@ entry:
; GCN: s_add_i32 [[VAL:s[0-9]+]], [[M]], s[[Z]]
; GCN-DAG: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) {
entry:
%x = call i32 @llvm.r600.read.local.size.x() #0
%y = call i32 @llvm.r600.read.local.size.y() #0
%z = call i32 @llvm.r600.read.local.size.z() #0
%xy = mul i32 %x, %y
%xyz = add i32 %xy, %z
- store i32 %xyz, i32 addrspace(1)* %out
+ store i32 %xyz, ptr addrspace(1) %out
ret void
}
@@ -132,12 +132,12 @@ entry:
; GCN-NOT: 0xffff
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) {
entry:
%size = call i32 @llvm.r600.read.local.size.x() #0
%shl = shl i32 %size, 16
%shr = lshr i32 %shl, 16
- store i32 %shr, i32 addrspace(1)* %out
+ store i32 %shr, ptr addrspace(1) %out
ret void
}
@@ -147,12 +147,12 @@ entry:
; GCN-NOT: 0xffff
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) {
entry:
%size = call i32 @llvm.r600.read.local.size.y() #0
%shl = shl i32 %size, 16
%shr = lshr i32 %shl, 16
- store i32 %shr, i32 addrspace(1)* %out
+ store i32 %shr, ptr addrspace(1) %out
ret void
}
@@ -162,12 +162,12 @@ entry:
; GCN-NOT: 0xffff
; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) {
entry:
%size = call i32 @llvm.r600.read.local.size.z() #0
%shl = shl i32 %size, 16
%shr = lshr i32 %shl, 16
- store i32 %shr, i32 addrspace(1)* %out
+ store i32 %shr, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
index 90d076d4fb4d..f2c25c79c780 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
@@ -4,8 +4,8 @@ declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
; EG-LABEL: {{^}}rsq_clamped_f32:
; EG: RECIPSQRT_CLAMPED
-define amdgpu_kernel void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
+define amdgpu_kernel void @rsq_clamped_f32(ptr addrspace(1) %out, float %src) nounwind {
%rsq_clamped = call float @llvm.r600.recipsqrt.clamped.f32(float %src)
- store float %rsq_clamped, float addrspace(1)* %out, align 4
+ store float %rsq_clamped, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
index d9177b39b8ac..004d348dfbd2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
@@ -4,25 +4,25 @@ declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone
; EG-LABEL: {{^}}recipsqrt.ieee_f32:
; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32(float addrspace(1)* %out, float %src) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32(ptr addrspace(1) %out, float %src) nounwind {
%recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float %src) nounwind readnone
- store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+ store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
ret void
}
; TODO: Really these should be constant folded
; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_4.0
; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(ptr addrspace(1) %out) nounwind {
%recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 4.0) nounwind readnone
- store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+ store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
ret void
}
; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_100.0
; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(ptr addrspace(1) %out) nounwind {
%recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 100.0) nounwind readnone
- store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+ store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
index 98044917e2b0..7ec4072f2622 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
@@ -17,9 +17,9 @@
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
-define amdgpu_kernel void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
bb:
- %addr = load <4 x float>, <4 x float> addrspace(1)* %in
+ %addr = load <4 x float>, ptr addrspace(1) %in
%tmp = shufflevector <4 x float> %addr, <4 x float> %addr, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp1 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -52,7 +52,7 @@ bb:
%tmp29 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
%tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1)
- store <4 x float> %tmp31, <4 x float> addrspace(1)* %out
+ store <4 x float> %tmp31, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
index c8f4e4c986a7..b0c8cc2810ff 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
@@ -15,19 +15,19 @@
; EG-NEXT: ALU clause
; EG: LDS_READ_RET
-define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 {
entry:
%y.i = call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
- store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %y.i
+ store i32 %y.i, ptr addrspace(3) %arrayidx, align 4
%add = add nsw i32 %y.i, 1
%cmp = icmp eq i32 %add, 16
%.add = select i1 %cmp, i32 0, i32 %add
call void @llvm.r600.group.barrier()
- %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
- %tmp = load i32, i32 addrspace(3)* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
- store i32 %tmp, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx1 = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %.add
+ %tmp = load i32, ptr addrspace(3) %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %y.i
+ store i32 %tmp, ptr addrspace(1) %arrayidx2, align 4
ret void
}
@@ -57,25 +57,25 @@ entry:
; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) #0 {
entry:
%x.i = call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
- store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
+ store i32 %x.i, ptr addrspace(3) %arrayidx, align 4
%mul = shl nsw i32 %x.i, 1
- %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
- store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
+ store i32 %mul, ptr addrspace(3) %arrayidx1, align 4
%sub = sub nsw i32 3, %x.i
call void @llvm.r600.group.barrier()
- %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
- %tmp = load i32, i32 addrspace(3)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
- store i32 %tmp, i32 addrspace(1)* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
- %tmp1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
+ %arrayidx2 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %sub
+ %tmp = load i32, ptr addrspace(3) %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %x.i
+ store i32 %tmp, ptr addrspace(1) %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %sub
+ %tmp1 = load i32, ptr addrspace(3) %arrayidx4, align 4
%add = add nsw i32 %x.i, 4
- %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
- store i32 %tmp1, i32 addrspace(1)* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %add
+ store i32 %tmp1, ptr addrspace(1) %arrayidx5, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
index 5bb21cb75952..98a7bee2c6d6 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
@@ -2,7 +2,7 @@
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefixes=CM %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s
-define amdgpu_kernel void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
; CM-LABEL: test_umul24_i32:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -38,12 +38,12 @@ entry:
%1 = shl i32 %b, 8
%b_24 = lshr i32 %1, 8
%2 = mul i32 %a_24, %b_24
- store i32 %2, i32 addrspace(1)* %out
+ store i32 %2, ptr addrspace(1) %out
ret void
}
; The result must be sign-extended.
-define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+define amdgpu_kernel void @test_umul24_i16_sext(ptr addrspace(1) %out, i16 %a, i16 %b) {
; CM-LABEL: test_umul24_i16_sext:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 0, @10, KC0[], KC1[]
@@ -88,12 +88,12 @@ define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a,
entry:
%mul = mul i16 %a, %b
%ext = sext i16 %mul to i32
- store i32 %ext, i32 addrspace(1)* %out
+ store i32 %ext, ptr addrspace(1) %out
ret void
}
; The result must be sign-extended.
-define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
+define amdgpu_kernel void @test_umul24_i8(ptr addrspace(1) %out, i8 %a, i8 %b) {
; CM-LABEL: test_umul24_i8:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 0, @10, KC0[], KC1[]
@@ -138,11 +138,11 @@ define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b)
entry:
%mul = mul i8 %a, %b
%ext = sext i8 %mul to i32
- store i32 %ext, i32 addrspace(1)* %out
+ store i32 %ext, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umulhi24_i32_i64(ptr addrspace(1) %out, i32 %a, i32 %b) {
; CM-LABEL: test_umulhi24_i32_i64:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
@@ -175,11 +175,11 @@ entry:
%mul48 = mul i64 %a.24.i64, %b.24.i64
%mul48.hi = lshr i64 %mul48, 32
%mul24hi = trunc i64 %mul48.hi to i32
- store i32 %mul24hi, i32 addrspace(1)* %out
+ store i32 %mul24hi, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umulhi24(ptr addrspace(1) %out, i64 %a, i64 %b) {
; CM-LABEL: test_umulhi24:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
@@ -210,12 +210,12 @@ entry:
%mul48 = mul i64 %a.24, %b.24
%mul48.hi = lshr i64 %mul48, 32
%mul24.hi = trunc i64 %mul48.hi to i32
- store i32 %mul24.hi, i32 addrspace(1)* %out
+ store i32 %mul24.hi, ptr addrspace(1) %out
ret void
}
; Multiply with 24-bit inputs and 64-bit output.
-define amdgpu_kernel void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umul24_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
; CM-LABEL: test_umul24_i64:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
@@ -257,6 +257,6 @@ entry:
%tmp1 = shl i64 %b, 40
%b_24 = lshr i64 %tmp1, 40
%tmp2 = mul i64 %a_24, %b_24
- store i64 %tmp2, i64 addrspace(1)* %out
+ store i64 %tmp2, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
index 4685e54d07f9..5af9f78b471c 100644
--- a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
+++ b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
@@ -22,35 +22,35 @@ entry:
%c1 = alloca i32, align 4, addrspace(5)
%d1 = alloca i32, align 4, addrspace(5)
%data = alloca i32, align 4, addrspace(5)
- %0 = load i32, i32 addrspace(5)* %a0, align 4
- %1 = load i32, i32 addrspace(5)* %b0, align 4
+ %0 = load i32, ptr addrspace(5) %a0, align 4
+ %1 = load i32, ptr addrspace(5) %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %2 = load i32, i32 addrspace(5)* %c0, align 4
- %3 = load i32, i32 addrspace(5)* %d0, align 4
+ %2 = load i32, ptr addrspace(5) %c0, align 4
+ %3 = load i32, ptr addrspace(5) %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %land.lhs.true
- store i32 1, i32 addrspace(5)* %data, align 4
+ store i32 1, ptr addrspace(5) %data, align 4
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32, i32 addrspace(5)* %a1, align 4
- %5 = load i32, i32 addrspace(5)* %b1, align 4
+ %4 = load i32, ptr addrspace(5) %a1, align 4
+ %5 = load i32, ptr addrspace(5) %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.end6
land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32 addrspace(5)* %c1, align 4
- %7 = load i32, i32 addrspace(5)* %d1, align 4
+ %6 = load i32, ptr addrspace(5) %c1, align 4
+ %7 = load i32, ptr addrspace(5) %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.end6
if.then5: ; preds = %land.lhs.true3
- store i32 1, i32 addrspace(5)* %data, align 4
+ store i32 1, ptr addrspace(5) %data, align 4
br label %if.end6
if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end
diff --git a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
index 1d55c9ee074a..fc71e161ce69 100644
--- a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
+++ b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
@@ -23,14 +23,14 @@ entry:
%c1 = alloca i32, align 4, addrspace(5)
%d1 = alloca i32, align 4, addrspace(5)
%data = alloca i32, align 4, addrspace(5)
- %0 = load i32, i32 addrspace(5)* %a0, align 4
- %1 = load i32, i32 addrspace(5)* %b0, align 4
+ %0 = load i32, ptr addrspace(5) %a0, align 4
+ %1 = load i32, ptr addrspace(5) %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.else
land.lhs.true: ; preds = %entry
- %2 = load i32, i32 addrspace(5)* %c0, align 4
- %3 = load i32, i32 addrspace(5)* %d0, align 4
+ %2 = load i32, ptr addrspace(5) %c0, align 4
+ %3 = load i32, ptr addrspace(5) %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.else
@@ -38,18 +38,18 @@ if.then: ; preds = %land.lhs.true
br label %if.end
if.else: ; preds = %land.lhs.true, %entry
- store i32 1, i32 addrspace(5)* %data, align 4
+ store i32 1, ptr addrspace(5) %data, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
- %4 = load i32, i32 addrspace(5)* %a1, align 4
- %5 = load i32, i32 addrspace(5)* %b1, align 4
+ %4 = load i32, ptr addrspace(5) %a1, align 4
+ %5 = load i32, ptr addrspace(5) %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.else6
land.lhs.true3: ; preds = %if.end
- %6 = load i32, i32 addrspace(5)* %c1, align 4
- %7 = load i32, i32 addrspace(5)* %d1, align 4
+ %6 = load i32, ptr addrspace(5) %c1, align 4
+ %7 = load i32, ptr addrspace(5) %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.else6
@@ -57,7 +57,7 @@ if.then5: ; preds = %land.lhs.true3
br label %if.end7
if.else6: ; preds = %land.lhs.true3, %if.end
- store i32 1, i32 addrspace(5)* %data, align 4
+ store i32 1, ptr addrspace(5) %data, align 4
br label %if.end7
if.end7: ; preds = %if.else6, %if.then5
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
index 6c8f308bd65e..81e8dfcdb3a4 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
@@ -18,23 +18,22 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1
; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1
-define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
entry:
%stack = alloca [5 x i32], align 4, addrspace(5)
- %0 = load i32, i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
- store i32 4, i32 addrspace(5)* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
- %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
- store i32 5, i32 addrspace(5)* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
- %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
- store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
- %3 = load i32, i32 addrspace(5)* %arrayidx12
- %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
- store i32 %3, i32 addrspace(1)* %arrayidx13
+ %0 = load i32, ptr addrspace(1) %in, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %0
+ store i32 4, ptr addrspace(5) %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1
+ %1 = load i32, ptr addrspace(1) %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %1
+ store i32 5, ptr addrspace(5) %arrayidx3, align 4
+ %2 = load i32, ptr addrspace(5) %stack, align 4
+ store i32 %2, ptr addrspace(1) %out, align 4
+ %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 1
+ %3 = load i32, ptr addrspace(5) %arrayidx12
+ %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1
+ store i32 %3, ptr addrspace(1) %arrayidx13
ret void
}
@@ -49,24 +48,20 @@ entry:
; R600-NOT: MOVA_INT
%struct.point = type { i32, i32 }
-define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 {
entry:
%a = alloca %struct.point, addrspace(5)
%b = alloca %struct.point, addrspace(5)
- %a.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
- %a.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1
- %b.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
- %b.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1
- store i32 0, i32 addrspace(5)* %a.x.ptr
- store i32 1, i32 addrspace(5)* %a.y.ptr
- store i32 2, i32 addrspace(5)* %b.x.ptr
- store i32 3, i32 addrspace(5)* %b.y.ptr
- %a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
- %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr
- %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr
+ %a.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %a, i32 0, i32 1
+ %b.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %b, i32 0, i32 1
+ store i32 0, ptr addrspace(5) %a
+ store i32 1, ptr addrspace(5) %a.y.ptr
+ store i32 2, ptr addrspace(5) %b
+ store i32 3, ptr addrspace(5) %b.y.ptr
+ %a.indirect = load i32, ptr addrspace(5) %a
+ %b.indirect = load i32, ptr addrspace(5) %b
%0 = add i32 %a.indirect, %b.indirect
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -77,69 +72,63 @@ entry:
; FUNC-LABEL: {{^}}direct_loop:
; R600-NOT: MOVA_INT
-define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
entry:
%prv_array_const = alloca [2 x i32], addrspace(5)
%prv_array = alloca [2 x i32], addrspace(5)
- %a = load i32, i32 addrspace(1)* %in
- %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
- %b = load i32, i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
- store i32 %a, i32 addrspace(5)* %a_dst_ptr
- %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1
- store i32 %b, i32 addrspace(5)* %b_dst_ptr
+ %a = load i32, ptr addrspace(1) %in
+ %b_src_ptr = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1
+ %b = load i32, ptr addrspace(1) %b_src_ptr
+ store i32 %a, ptr addrspace(5) %prv_array_const
+ %b_dst_ptr = getelementptr inbounds [2 x i32], ptr addrspace(5) %prv_array_const, i32 0, i32 1
+ store i32 %b, ptr addrspace(5) %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
- %x = load i32, i32 addrspace(5)* %x_ptr
- %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
- %y = load i32, i32 addrspace(5)* %y_ptr
+ %x = load i32, ptr addrspace(5) %prv_array_const
+ %y = load i32, ptr addrspace(5) %prv_array
%xy = add i32 %x, %y
- store i32 %xy, i32 addrspace(5)* %y_ptr
+ store i32 %xy, ptr addrspace(5) %prv_array
%count = add i32 %inc, 1
%done = icmp eq i32 %count, 4095
br i1 %done, label %for.end, label %for.body
for.end:
- %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
- %value = load i32, i32 addrspace(5)* %value_ptr
- store i32 %value, i32 addrspace(1)* %out
+ %value = load i32, ptr addrspace(5) %prv_array
+ store i32 %value, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}short_array:
; R600-VECT: MOVA_INT
-define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i16], addrspace(5)
- %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1
- store i16 0, i16 addrspace(5)* %1
- store i16 1, i16 addrspace(5)* %2
- %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index
- %4 = load i16, i16 addrspace(5)* %3
- %5 = sext i16 %4 to i32
- store i32 %5, i32 addrspace(1)* %out
+ %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1
+ store i16 0, ptr addrspace(5) %0
+ store i16 1, ptr addrspace(5) %1
+ %2 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 %index
+ %3 = load i16, ptr addrspace(5) %2
+ %4 = sext i16 %3 to i32
+ store i32 %4, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}char_array:
; R600-VECT: MOVA_INT
-define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%0 = alloca [2 x i8], addrspace(5)
- %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1
- store i8 0, i8 addrspace(5)* %1
- store i8 1, i8 addrspace(5)* %2
- %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index
- %4 = load i8, i8 addrspace(5)* %3
- %5 = sext i8 %4 to i32
- store i32 %5, i32 addrspace(1)* %out
+ %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1
+ store i8 0, ptr addrspace(5) %0
+ store i8 1, ptr addrspace(5) %1
+ %2 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 %index
+ %3 = load i8, ptr addrspace(5) %2
+ %4 = sext i8 %3 to i32
+ store i32 %4, ptr addrspace(1) %out
ret void
}
@@ -150,127 +139,120 @@ entry:
; R600-NOT: MOV T0.X
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) #0 {
entry:
%0 = alloca [2 x i32], addrspace(5)
- %1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
- %2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
- store i32 0, i32 addrspace(5)* %1
- store i32 1, i32 addrspace(5)* %2
- %3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
- %4 = load i32, i32 addrspace(5)* %3
- %5 = call i32 @llvm.r600.read.tidig.x()
- %6 = add i32 %4, %5
- store i32 %6, i32 addrspace(1)* %out
+ %1 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
+ store i32 0, ptr addrspace(5) %0
+ store i32 1, ptr addrspace(5) %1
+ %2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in
+ %3 = load i32, ptr addrspace(5) %2
+ %4 = call i32 @llvm.r600.read.tidig.x()
+ %5 = add i32 %3, %4
+ store i32 %5, ptr addrspace(1) %out
ret void
}
; Test that two stack objects are not stored in the same register
; The second stack object should be in T3.X
; FUNC-LABEL: {{^}}no_overlap:
-define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 {
entry:
%0 = alloca [3 x i8], align 1, addrspace(5)
%1 = alloca [2 x i8], align 1, addrspace(5)
- %2 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0
- %3 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1
- %4 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2
- %5 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0
- %6 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1
- store i8 0, i8 addrspace(5)* %2
- store i8 1, i8 addrspace(5)* %3
- store i8 2, i8 addrspace(5)* %4
- store i8 1, i8 addrspace(5)* %5
- store i8 0, i8 addrspace(5)* %6
- %7 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in
- %8 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in
- %9 = load i8, i8 addrspace(5)* %7
- %10 = load i8, i8 addrspace(5)* %8
- %11 = add i8 %9, %10
- %12 = sext i8 %11 to i32
- store i32 %12, i32 addrspace(1)* %out
+ %2 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 1
+ %3 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 2
+ %4 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 1
+ store i8 0, ptr addrspace(5) %0
+ store i8 1, ptr addrspace(5) %2
+ store i8 2, ptr addrspace(5) %3
+ store i8 1, ptr addrspace(5) %1
+ store i8 0, ptr addrspace(5) %4
+ %5 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 %in
+ %6 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 %in
+ %7 = load i8, ptr addrspace(5) %5
+ %8 = load i8, ptr addrspace(5) %6
+ %9 = add i8 %7, %8
+ %10 = sext i8 %9 to i32
+ store i32 %10, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i8]], addrspace(5)
- %gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
- store i8 0, i8 addrspace(5)* %gep0
- store i8 1, i8 addrspace(5)* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
- %load = load i8, i8 addrspace(5)* %gep2
+ %gep1 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+ store i8 0, ptr addrspace(5) %alloca
+ store i8 1, ptr addrspace(5) %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+ %load = load i8, ptr addrspace(5) %gep2
%sext = sext i8 %load to i32
- store i32 %sext, i32 addrspace(1)* %out
+ store i32 %sext, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i32]], addrspace(5)
- %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
- store i32 0, i32 addrspace(5)* %gep0
- store i32 1, i32 addrspace(5)* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
- %load = load i32, i32 addrspace(5)* %gep2
- store i32 %load, i32 addrspace(1)* %out
+ %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+ store i32 0, ptr addrspace(5) %alloca
+ store i32 1, ptr addrspace(5) %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, ptr addrspace(5) %gep2
+ store i32 %load, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x i64]], addrspace(5)
- %gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
- store i64 0, i64 addrspace(5)* %gep0
- store i64 1, i64 addrspace(5)* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
- %load = load i64, i64 addrspace(5)* %gep2
- store i64 %load, i64 addrspace(1)* %out
+ %gep1 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+ store i64 0, ptr addrspace(5) %alloca
+ store i64 1, ptr addrspace(5) %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+ %load = load i64, ptr addrspace(5) %gep2
+ store i64 %load, ptr addrspace(1) %out
ret void
}
%struct.pair32 = type { i32, i32 }
-define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
- %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1
- store i32 0, i32 addrspace(5)* %gep0
- store i32 1, i32 addrspace(5)* %gep1
- %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32, i32 addrspace(5)* %gep2
- store i32 %load, i32 addrspace(1)* %out
+ %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1
+ %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1, i32 1
+ store i32 0, ptr addrspace(5) %gep0
+ store i32 1, ptr addrspace(5) %gep1
+ %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index, i32 0
+ %load = load i32, ptr addrspace(5) %gep2
+ store i32 %load, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 {
entry:
%alloca = alloca [2 x %struct.pair32], addrspace(5)
- %gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0
- store i32 0, i32 addrspace(5)* %gep0
- store i32 1, i32 addrspace(5)* %gep1
- %gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0
- %load = load i32, i32 addrspace(5)* %gep2
- store i32 %load, i32 addrspace(1)* %out
+ %gep0 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+ %gep1 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
+ store i32 0, ptr addrspace(5) %gep0
+ store i32 1, ptr addrspace(5) %gep1
+ %gep2 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 %index, i32 0
+ %load = load i32, ptr addrspace(5) %gep2
+ store i32 %load, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @select_private(ptr addrspace(1) %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32], addrspace(5)
- %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
- %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
- store i32 0, i32 addrspace(5)* %tmp1
- store i32 1, i32 addrspace(5)* %tmp2
+ %tmp2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
+ store i32 0, ptr addrspace(5) %tmp
+ store i32 1, ptr addrspace(5) %tmp2
%cmp = icmp eq i32 %in, 0
- %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2
- %load = load i32, i32 addrspace(5)* %sel
- store i32 %load, i32 addrspace(1)* %out
+ %sel = select i1 %cmp, ptr addrspace(5) %tmp, ptr addrspace(5) %tmp2
+ %load = load i32, ptr addrspace(5) %sel
+ store i32 %load, ptr addrspace(1) %out
ret void
}
@@ -281,16 +263,16 @@ entry:
; SI-NOT: ds_write
; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
-define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
%alloca = alloca [16 x i32], addrspace(5)
- %tmp0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
- store i32 5, i32 addrspace(5)* %tmp0
- %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32
+ %tmp0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
+ store i32 5, ptr addrspace(5) %tmp0
+ %tmp1 = ptrtoint ptr addrspace(5) %alloca to i32
%tmp2 = add i32 %tmp1, 5
- %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)*
- %tmp4 = getelementptr inbounds i32, i32 addrspace(5)* %tmp3, i32 %b
- %tmp5 = load i32, i32 addrspace(5)* %tmp4
- store i32 %tmp5, i32 addrspace(1)* %out
+ %tmp3 = inttoptr i32 %tmp2 to ptr addrspace(5)
+ %tmp4 = getelementptr inbounds i32, ptr addrspace(5) %tmp3, i32 %b
+ %tmp5 = load i32, ptr addrspace(5) %tmp4
+ store i32 %tmp5, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll
index 1474dbabba69..ed24e2f97c2c 100644
--- a/llvm/test/CodeGen/AMDGPU/pv.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv.ll
@@ -32,63 +32,63 @@ main_body:
%tmp37 = extractelement <4 x float> %reg7, i32 1
%tmp38 = extractelement <4 x float> %reg7, i32 2
%tmp39 = extractelement <4 x float> %reg7, i32 3
- %tmp40 = load <4 x float>, <4 x float> addrspace(8)* null
+ %tmp40 = load <4 x float>, ptr addrspace(8) null
%tmp41 = extractelement <4 x float> %tmp40, i32 0
%tmp42 = fmul float %tmp, %tmp41
- %tmp43 = load <4 x float>, <4 x float> addrspace(8)* null
+ %tmp43 = load <4 x float>, ptr addrspace(8) null
%tmp44 = extractelement <4 x float> %tmp43, i32 1
%tmp45 = fmul float %tmp, %tmp44
- %tmp46 = load <4 x float>, <4 x float> addrspace(8)* null
+ %tmp46 = load <4 x float>, ptr addrspace(8) null
%tmp47 = extractelement <4 x float> %tmp46, i32 2
%tmp48 = fmul float %tmp, %tmp47
- %tmp49 = load <4 x float>, <4 x float> addrspace(8)* null
+ %tmp49 = load <4 x float>, ptr addrspace(8) null
%tmp50 = extractelement <4 x float> %tmp49, i32 3
%tmp51 = fmul float %tmp, %tmp50
- %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%tmp53 = extractelement <4 x float> %tmp52, i32 0
%tmp54 = fmul float %tmp13, %tmp53
%tmp55 = fadd float %tmp54, %tmp42
- %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%tmp57 = extractelement <4 x float> %tmp56, i32 1
%tmp58 = fmul float %tmp13, %tmp57
%tmp59 = fadd float %tmp58, %tmp45
- %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%tmp61 = extractelement <4 x float> %tmp60, i32 2
%tmp62 = fmul float %tmp13, %tmp61
%tmp63 = fadd float %tmp62, %tmp48
- %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%tmp65 = extractelement <4 x float> %tmp64, i32 3
%tmp66 = fmul float %tmp13, %tmp65
%tmp67 = fadd float %tmp66, %tmp51
- %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%tmp69 = extractelement <4 x float> %tmp68, i32 0
%tmp70 = fmul float %tmp14, %tmp69
%tmp71 = fadd float %tmp70, %tmp55
- %tmp72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %tmp72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%tmp73 = extractelement <4 x float> %tmp72, i32 1
%tmp74 = fmul float %tmp14, %tmp73
%tmp75 = fadd float %tmp74, %tmp59
- %tmp76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %tmp76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%tmp77 = extractelement <4 x float> %tmp76, i32 2
%tmp78 = fmul float %tmp14, %tmp77
%tmp79 = fadd float %tmp78, %tmp63
- %tmp80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %tmp80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%tmp81 = extractelement <4 x float> %tmp80, i32 3
%tmp82 = fmul float %tmp14, %tmp81
%tmp83 = fadd float %tmp82, %tmp67
- %tmp84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %tmp84 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%tmp85 = extractelement <4 x float> %tmp84, i32 0
%tmp86 = fmul float %tmp15, %tmp85
%tmp87 = fadd float %tmp86, %tmp71
- %tmp88 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %tmp88 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%tmp89 = extractelement <4 x float> %tmp88, i32 1
%tmp90 = fmul float %tmp15, %tmp89
%tmp91 = fadd float %tmp90, %tmp75
- %tmp92 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %tmp92 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%tmp93 = extractelement <4 x float> %tmp92, i32 2
%tmp94 = fmul float %tmp15, %tmp93
%tmp95 = fadd float %tmp94, %tmp79
- %tmp96 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %tmp96 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%tmp97 = extractelement <4 x float> %tmp96, i32 3
%tmp98 = fmul float %tmp15, %tmp97
%tmp99 = fadd float %tmp98, %tmp83
@@ -106,15 +106,15 @@ main_body:
%tmp111 = fmul float %tmp16, %tmp110
%tmp112 = fmul float %tmp17, %tmp110
%tmp113 = fmul float %tmp18, %tmp110
- %tmp114 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %tmp114 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%tmp115 = extractelement <4 x float> %tmp114, i32 0
%tmp116 = fmul float %tmp115, %tmp20
%tmp117 = fadd float %tmp116, %tmp32
- %tmp118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %tmp118 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%tmp119 = extractelement <4 x float> %tmp118, i32 1
%tmp120 = fmul float %tmp119, %tmp21
%tmp121 = fadd float %tmp120, %tmp33
- %tmp122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %tmp122 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%tmp123 = extractelement <4 x float> %tmp122, i32 2
%tmp124 = fmul float %tmp123, %tmp22
%tmp125 = fadd float %tmp124, %tmp34
@@ -126,11 +126,11 @@ main_body:
%clamp.i10 = call float @llvm.minnum.f32(float %max.0.i9, float 1.000000e+00)
%max.0.i7 = call float @llvm.maxnum.f32(float %tmp27, float 0.000000e+00)
%clamp.i8 = call float @llvm.minnum.f32(float %max.0.i7, float 1.000000e+00)
- %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%tmp127 = extractelement <4 x float> %tmp126, i32 0
- %tmp128 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %tmp128 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%tmp129 = extractelement <4 x float> %tmp128, i32 1
- %tmp130 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %tmp130 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%tmp131 = extractelement <4 x float> %tmp130, i32 2
%tmp132 = insertelement <4 x float> undef, float %tmp111, i32 0
%tmp133 = insertelement <4 x float> %tmp132, float %tmp112, i32 1
@@ -141,11 +141,11 @@ main_body:
%tmp138 = insertelement <4 x float> %tmp137, float %tmp131, i32 2
%tmp139 = insertelement <4 x float> %tmp138, float 0.000000e+00, i32 3
%tmp140 = call float @llvm.r600.dot4(<4 x float> %tmp135, <4 x float> %tmp139)
- %tmp141 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %tmp141 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%tmp142 = extractelement <4 x float> %tmp141, i32 0
- %tmp143 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %tmp143 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%tmp144 = extractelement <4 x float> %tmp143, i32 1
- %tmp145 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %tmp145 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%tmp146 = extractelement <4 x float> %tmp145, i32 2
%tmp147 = insertelement <4 x float> undef, float %tmp111, i32 0
%tmp148 = insertelement <4 x float> %tmp147, float %tmp112, i32 1
@@ -156,31 +156,31 @@ main_body:
%tmp153 = insertelement <4 x float> %tmp152, float %tmp146, i32 2
%tmp154 = insertelement <4 x float> %tmp153, float 0.000000e+00, i32 3
%tmp155 = call float @llvm.r600.dot4(<4 x float> %tmp150, <4 x float> %tmp154)
- %tmp156 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %tmp156 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
%tmp157 = extractelement <4 x float> %tmp156, i32 0
%tmp158 = fmul float %tmp157, %tmp20
- %tmp159 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %tmp159 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
%tmp160 = extractelement <4 x float> %tmp159, i32 1
%tmp161 = fmul float %tmp160, %tmp21
- %tmp162 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %tmp162 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
%tmp163 = extractelement <4 x float> %tmp162, i32 2
%tmp164 = fmul float %tmp163, %tmp22
- %tmp165 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp165 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp166 = extractelement <4 x float> %tmp165, i32 0
%tmp167 = fmul float %tmp166, %tmp24
- %tmp168 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp168 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp169 = extractelement <4 x float> %tmp168, i32 1
%tmp170 = fmul float %tmp169, %tmp25
- %tmp171 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %tmp171 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
%tmp172 = extractelement <4 x float> %tmp171, i32 2
%tmp173 = fmul float %tmp172, %tmp26
- %tmp174 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %tmp174 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
%tmp175 = extractelement <4 x float> %tmp174, i32 0
%tmp176 = fmul float %tmp175, %tmp28
- %tmp177 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %tmp177 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
%tmp178 = extractelement <4 x float> %tmp177, i32 1
%tmp179 = fmul float %tmp178, %tmp29
- %tmp180 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %tmp180 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
%tmp181 = extractelement <4 x float> %tmp180, i32 2
%tmp182 = fmul float %tmp181, %tmp30
%tmp183 = fcmp uge float %tmp140, 0.000000e+00
diff --git a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
index 2984c280b00a..e6c068f5c5b1 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
@@ -17,11 +17,11 @@
; CHECK: Other: 0
; CHECK: Section: .text (0x2)
; CHECK: }
-define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @test_constant_array_fixup(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
entry:
- %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx
- %val = load i32, i32 addrspace(4)* %arrayidx
- store i32 %val, i32 addrspace(1)* %out, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(4) @arr, i32 0, i32 %idx
+ %val = load i32, ptr addrspace(4) %arrayidx
+ store i32 %val, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
index 4dafe7d09e9b..39a9413e5dda 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
@@ -47,83 +47,83 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%5 = extractelement <4 x float> %4, i32 0
%6 = fmul float %5, %0
- %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%8 = extractelement <4 x float> %7, i32 1
%9 = fmul float %8, %0
- %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%11 = extractelement <4 x float> %10, i32 2
%12 = fmul float %11, %0
- %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
%14 = extractelement <4 x float> %13, i32 3
%15 = fmul float %14, %0
- %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%17 = extractelement <4 x float> %16, i32 0
%18 = fmul float %17, %1
%19 = fadd float %18, %6
- %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %20 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%21 = extractelement <4 x float> %20, i32 1
%22 = fmul float %21, %1
%23 = fadd float %22, %9
- %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %24 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%25 = extractelement <4 x float> %24, i32 2
%26 = fmul float %25, %1
%27 = fadd float %26, %12
- %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %28 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
%29 = extractelement <4 x float> %28, i32 3
%30 = fmul float %29, %1
%31 = fadd float %30, %15
- %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %32 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float %33, %2
%35 = fadd float %34, %19
- %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %36 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
%37 = extractelement <4 x float> %36, i32 1
%38 = fmul float %37, %2
%39 = fadd float %38, %23
- %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %40 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
%41 = extractelement <4 x float> %40, i32 2
%42 = fmul float %41, %2
%43 = fadd float %42, %27
- %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %44 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
%45 = extractelement <4 x float> %44, i32 3
%46 = fmul float %45, %2
%47 = fadd float %46, %31
- %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%49 = extractelement <4 x float> %48, i32 0
%50 = fmul float %49, %3
%51 = fadd float %50, %35
- %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%53 = extractelement <4 x float> %52, i32 1
%54 = fmul float %53, %3
%55 = fadd float %54, %39
- %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%57 = extractelement <4 x float> %56, i32 2
%58 = fmul float %57, %3
%59 = fadd float %58, %43
- %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
%61 = extractelement <4 x float> %60, i32 3
%62 = fmul float %61, %3
%63 = fadd float %62, %47
- %64 = load <4 x float>, <4 x float> addrspace(8)* null
+ %64 = load <4 x float>, ptr addrspace(8) null
%65 = extractelement <4 x float> %64, i32 0
- %66 = load <4 x float>, <4 x float> addrspace(8)* null
+ %66 = load <4 x float>, ptr addrspace(8) null
%67 = extractelement <4 x float> %66, i32 1
- %68 = load <4 x float>, <4 x float> addrspace(8)* null
+ %68 = load <4 x float>, ptr addrspace(8) null
%69 = extractelement <4 x float> %68, i32 2
- %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %70 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%71 = extractelement <4 x float> %70, i32 0
- %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%73 = extractelement <4 x float> %72, i32 1
- %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %74 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
%75 = extractelement <4 x float> %74, i32 2
- %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%77 = extractelement <4 x float> %76, i32 0
- %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %78 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%79 = extractelement <4 x float> %78, i32 1
- %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
%81 = extractelement <4 x float> %80, i32 2
%82 = insertelement <4 x float> undef, float %51, i32 0
%83 = insertelement <4 x float> %82, float %55, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
index f0604c7fe782..f3a785db0b2b 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=r600 -mcpu=cypress -start-after safe-stack | FileCheck %s
; Don't crash
-define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
; CHECK-LABEL: test:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: ALU 4, @6, KC0[CB0:0-32], KC1[]
@@ -23,13 +23,13 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y,
; CHECK-NEXT: 4(5.605194e-45), 2(2.802597e-45)
bb:
- store i64 2, i64 addrspace(1)* %out
- %tmp = load i64, i64 addrspace(1)* %out
+ store i64 2, ptr addrspace(1) %out
+ %tmp = load i64, ptr addrspace(1) %out
br label %jump
jump: ; preds = %bb
%tmp1 = icmp ugt i64 %tmp, 4
%umax = select i1 %tmp1, i64 %tmp, i64 4
- store i64 %umax, i64 addrspace(1)* %out
+ store i64 %umax, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.add.ll b/llvm/test/CodeGen/AMDGPU/r600.add.ll
index 73eea3ef2177..a4a7c3d86d80 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.add.ll
@@ -2,24 +2,24 @@
; FUNC-LABEL: {{^}}s_add_i32:
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+ %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+ %a = load i32, ptr addrspace(1) %in
+ %b = load i32, ptr addrspace(1) %b_ptr
%result = add i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_add_v2i32:
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
- %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = add <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -28,12 +28,12 @@ define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
- %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = add <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -46,10 +46,10 @@ define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
-define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
+define amdgpu_kernel void @s_add_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b) {
entry:
%0 = add <8 x i32> %a, %b
- store <8 x i32> %0, <8 x i32> addrspace(1)* %out
+ store <8 x i32> %0, ptr addrspace(1) %out
ret void
}
@@ -70,33 +70,33 @@ entry:
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
-define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
+define amdgpu_kernel void @s_add_v16i32(ptr addrspace(1) %out, <16 x i32> %a, <16 x i32> %b) {
entry:
%0 = add <16 x i32> %a, %b
- store <16 x i32> %0, <16 x i32> addrspace(1)* %out
+ store <16 x i32> %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_add_i32:
-define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
- %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
- %a = load volatile i32, i32 addrspace(1)* %gep
- %b = load volatile i32, i32 addrspace(1)* %b_ptr
+ %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+ %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
+ %a = load volatile i32, ptr addrspace(1) %gep
+ %b = load volatile i32, ptr addrspace(1) %b_ptr
%result = add i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_add_imm_i32:
-define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x()
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
- %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
- %a = load volatile i32, i32 addrspace(1)* %gep
+ %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+ %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
+ %a = load volatile i32, ptr addrspace(1) %gep
%result = add i32 %a, 123
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
@@ -107,10 +107,10 @@ define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
-define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @add64(ptr addrspace(1) %out, i64 %a, i64 %b) {
entry:
%add = add i64 %a, %b
- store i64 %add, i64 addrspace(1)* %out
+ store i64 %add, ptr addrspace(1) %out
ret void
}
@@ -126,11 +126,11 @@ entry:
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
-define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @add64_sgpr_vgpr(ptr addrspace(1) %out, i64 %a, ptr addrspace(1) %in) {
entry:
- %0 = load i64, i64 addrspace(1)* %in
+ %0 = load i64, ptr addrspace(1) %in
%1 = add i64 %a, %0
- store i64 %1, i64 addrspace(1)* %out
+ store i64 %1, ptr addrspace(1) %out
ret void
}
@@ -142,13 +142,13 @@ entry:
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
-define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
br i1 %0, label %if, label %else
if:
- %1 = load i64, i64 addrspace(1)* %in
+ %1 = load i64, ptr addrspace(1) %in
br label %endif
else:
@@ -157,7 +157,7 @@ else:
endif:
%3 = phi i64 [%1, %if], [%2, %else]
- store i64 %3, i64 addrspace(1)* %out
+ store i64 %3, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
index 6caa6173e7a0..acac5c321dfd 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
@@ -6,10 +6,10 @@
%struct.foo = type {i32, i32, i32}
-define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo addrspace(5)* %in, i32 %offset) {
+define amdgpu_kernel void @alu_limits(ptr addrspace(1) %out, ptr addrspace(5) %in, i32 %offset) {
entry:
- %ptr = getelementptr inbounds %struct.foo, %struct.foo addrspace(5)* %in, i32 1, i32 2
- %x = load i32, i32 addrspace(5)*%ptr, align 4
+ %ptr = getelementptr inbounds %struct.foo, ptr addrspace(5) %in, i32 1, i32 2
+ %x = load i32, ptr addrspace(5) %ptr, align 4
br label %loop
loop:
%i = phi i32 [ 100, %entry ], [ %nexti, %loop ]
@@ -24,6 +24,6 @@ loop:
br i1 %cond, label %loop, label %end
end:
%out_val = add i32 %nextval, 4
- store i32 %out_val, i32 addrspace(1)* %out, align 4
+ store i32 %out_val, ptr addrspace(1) %out, align 4
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
index a07c0a5d5422..66e38f59f44b 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
@@ -1,8 +1,8 @@
; RUN: opt -mtriple=r600-- -passes='default<O3>,aa-eval' -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1
-define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) {
- load volatile i8, i8 addrspace(5)* %p
- load volatile i8, i8 addrspace(999)* %p1
+define amdgpu_kernel void @test(ptr addrspace(5) %p, ptr addrspace(999) %p1) {
+ load volatile i8, ptr addrspace(5) %p
+ load volatile i8, ptr addrspace(999) %p1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
index 20de997e1bce..e7674ce6d88d 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
@@ -3,7 +3,7 @@
; This test just checks that the compiler doesn't crash.
-define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; EG-LABEL: i8ptr_v16i8ptr:
; EG: ; %bb.0: ; %entry
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -20,13 +20,12 @@ define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrs
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
- %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
- %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
- store <16 x i8> %1, <16 x i8> addrspace(1)* %out
+ %0 = load <16 x i8>, ptr addrspace(1) %in
+ store <16 x i8> %0, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: f32_to_v2i16:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -42,13 +41,13 @@ define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addr
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load float, float addrspace(1)* %in, align 4
+ %load = load float, ptr addrspace(1) %in, align 4
%bc = bitcast float %load to <2 x i16>
- store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
+ store <2 x i16> %bc, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: v2i16_to_f32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -64,13 +63,13 @@ define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addr
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
+ %load = load <2 x i16>, ptr addrspace(1) %in, align 4
%bc = bitcast <2 x i16> %load to float
- store float %bc, float addrspace(1)* %out, align 4
+ store float %bc, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: v4i8_to_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -86,13 +85,13 @@ define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspac
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, ptr addrspace(1) %in, align 4
%bc = bitcast <4 x i8> %load to i32
- store i32 %bc, i32 addrspace(1)* %out, align 4
+ store i32 %bc, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: i32_to_v4i8:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -108,13 +107,13 @@ define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspac
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load i32, i32 addrspace(1)* %in, align 4
+ %load = load i32, ptr addrspace(1) %in, align 4
%bc = bitcast i32 %load to <4 x i8>
- store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
+ store <4 x i8> %bc, ptr addrspace(1) %out, align 4
ret void
}
-define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: v2i16_to_v4i8:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -130,16 +129,16 @@ define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16>
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
+ %load = load <2 x i16>, ptr addrspace(1) %in, align 4
%bc = bitcast <2 x i16> %load to <4 x i8>
- store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
+ store <4 x i8> %bc, ptr addrspace(1) %out, align 4
ret void
}
; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine
; the stack manipulation is tricky to follow
; TODO: This should only use one load
-define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i16_extract_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; EG-LABEL: v4i16_extract_i8:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
@@ -172,14 +171,14 @@ define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> add
; EG-NEXT: MOV * T5.Z, 0.0,
; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 2
+ %load = load <4 x i16>, ptr addrspace(1) %in, align 2
%bc = bitcast <4 x i16> %load to <8 x i8>
%element = extractelement <8 x i8> %bc, i32 5
- store i8 %element, i8 addrspace(1)* %out
+ store i8 %element, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; EG-LABEL: bitcast_v2i32_to_f64:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -195,9 +194,9 @@ define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
- %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, ptr addrspace(1) %in, align 8
%bc = bitcast <2 x i32> %val to double
- store double %bc, double addrspace(1)* %out, align 8
+ store double %bc, ptr addrspace(1) %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
index 71af6a9a4f51..9f2cf98dc3ef 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
@@ -16,7 +16,7 @@
; Pattern a. 32-bit
; ---------------------------------------------------------------------------- ;
-define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_a0:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -41,11 +41,11 @@ define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_a1_indexzext:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
@@ -87,11 +87,11 @@ define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits,
%onebit = shl i32 1, %conv
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_a4_commutative:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -116,7 +116,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %val, %mask ; swapped order
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
@@ -124,7 +124,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern b. 32-bit
; ---------------------------------------------------------------------------- ;
-define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_b0:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -149,11 +149,11 @@ define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_b1_indexzext:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
@@ -195,11 +195,11 @@ define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits,
%notmask = shl i32 -1, %conv
%mask = xor i32 %notmask, -1
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_b4_commutative:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -224,7 +224,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
%masked = and i32 %val, %mask ; swapped order
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
@@ -232,7 +232,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern c. 32-bit
; ---------------------------------------------------------------------------- ;
-define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_c0:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -257,11 +257,11 @@ define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_c1_indexzext:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
@@ -311,11 +311,11 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 add
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
%masked = and i32 %mask, %val
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_c4_commutative:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -340,7 +340,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
%masked = and i32 %val, %mask ; swapped order
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
@@ -348,7 +348,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32
; Pattern d. 32-bit.
; ---------------------------------------------------------------------------- ;
-define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_d0:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -373,11 +373,11 @@ define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)
%numhighbits = sub i32 32, %numlowbits
%highbitscleared = shl i32 %val, %numhighbits
%masked = lshr i32 %highbitscleared, %numhighbits
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) {
; EG-LABEL: bzhi32_d1_indexzext:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
@@ -425,6 +425,6 @@ define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 add
%sh_prom = zext i8 %numhighbits to i32
%highbitscleared = shl i32 %val, %sh_prom
%masked = lshr i32 %highbitscleared, %sh_prom
- store i32 %masked, i32 addrspace(1)* %out
+ store i32 %masked, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
index 3d2f1b4fb9f4..1963574c9494 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
@@ -6,20 +6,20 @@
; FUNC-LABEL: {{^}}atomic_add_i32_offset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_add_i32_soffset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_soffset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
@@ -27,516 +27,516 @@ entry:
; FIXME: looks like the offset is wrong
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_huge_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_add_i32:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_and_i32_offset:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_and_i32:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_i32:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_max_i32_offset:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_max_i32:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile max ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile max ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umax_i32:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umax ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile umax ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_min_i32_offset:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_min_i32:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile min ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile min ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umin_i32:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile umin ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile umin ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_or_i32_offset:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_or_i32:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xchg_i32:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32(ptr addrspace(1) %out, i32 %in, i32 %old) {
entry:
- %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in seq_cst seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_offset(ptr addrspace(1) %out, i32 %in) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xor_i32:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32(ptr addrspace(1) %out, i32 %in) {
entry:
- %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+ %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_store_i32_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, ptr addrspace(1) %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ store atomic i32 %in, ptr addrspace(1) %gep seq_cst, align 4
ret void
}
; FUNC-LABEL: {{^}}atomic_store_i32:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32(i32 %in, ptr addrspace(1) %out) {
entry:
- store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
+ store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4
ret void
}
; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, ptr addrspace(1) %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
- store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+ store atomic i32 %in, ptr addrspace(1) %gep seq_cst, align 4
ret void
}
; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, ptr addrspace(1) %out, i64 %index) {
entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
- store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
+ %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+ store atomic i32 %in, ptr addrspace(1) %ptr seq_cst, align 4
ret void
}
; FUNC-LABEL: {{^}}atomic_add_1
; EG: MEM_RAT ATOMIC_ADD
-define amdgpu_kernel void @atomic_add_1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_add_1(ptr addrspace(1) %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_add_neg1
; EG: MEM_RAT ATOMIC_ADD
-define amdgpu_kernel void @atomic_add_neg1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_add_neg1(ptr addrspace(1) %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 -1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_neg1
; EG: MEM_RAT ATOMIC_SUB
-define amdgpu_kernel void @atomic_sub_neg1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_sub_neg1(ptr addrspace(1) %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 -1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}atomic_sub_1
; EG: MEM_RAT ATOMIC_SUB
-define amdgpu_kernel void @atomic_sub_1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_sub_1(ptr addrspace(1) %out) {
entry:
- %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
- %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst
+ %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+ %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 seq_cst
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
index 99d55feb740e..1650185a9f07 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
@@ -10,17 +10,16 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X
-define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {
entry:
%0 = alloca [2 x i32], addrspace(5)
- %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
- store i32 0, i32 addrspace(5)* %1
- store i32 1, i32 addrspace(5)* %2
- %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
- %4 = load i32, i32 addrspace(5)* %3
- %5 = call i32 @llvm.r600.read.tidig.x()
- %6 = add i32 %4, %5
- store i32 %6, i32 addrspace(1)* %out
+ %1 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
+ store i32 0, ptr addrspace(5) %0
+ store i32 1, ptr addrspace(5) %1
+ %2 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in
+ %3 = load i32, ptr addrspace(5) %2
+ %4 = call i32 @llvm.r600.read.tidig.x()
+ %5 = add i32 %3, %4
+ store i32 %5, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.sub.ll b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
index 2ded4f64328d..98eec2f08b37 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
@@ -3,48 +3,48 @@
declare i32 @llvm.r600.read.tidig.x() readnone
; FUNC-LABEL: {{^}}s_sub_i32:
-define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
%result = sub i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}s_sub_imm_i32:
-define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @s_sub_imm_i32(ptr addrspace(1) %out, i32 %a) {
%result = sub i32 1234, %a
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
- %a = load i32, i32 addrspace(1)* %in
- %b = load i32, i32 addrspace(1)* %b_ptr
+define amdgpu_kernel void @test_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+ %a = load i32, ptr addrspace(1) %in
+ %b = load i32, ptr addrspace(1) %b_ptr
%result = sub i32 %a, %b
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_imm_i32:
; EG: SUB_INT
-define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %a = load i32, i32 addrspace(1)* %in
+define amdgpu_kernel void @test_sub_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %a = load i32, ptr addrspace(1) %in
%result = sub i32 123, %a
- store i32 %result, i32 addrspace(1)* %out
+ store i32 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_v2i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
- %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test_sub_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <2 x i32>, ptr addrspace(1) %in
+ %b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = sub <2 x i32> %a, %b
- store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+ store <2 x i32> %result, ptr addrspace(1) %out
ret void
}
@@ -53,48 +53,48 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
- %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test_sub_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+ %a = load <4 x i32>, ptr addrspace(1) %in
+ %b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = sub <4 x i32> %a, %b
- store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+ store <4 x i32> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_i16:
-define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
- %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
- %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
- %a = load volatile i16, i16 addrspace(1)* %gep
- %b = load volatile i16, i16 addrspace(1)* %b_ptr
+ %gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid
+ %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i32 1
+ %a = load volatile i16, ptr addrspace(1) %gep
+ %b = load volatile i16, ptr addrspace(1) %b_ptr
%result = sub i16 %a, %b
- store i16 %result, i16 addrspace(1)* %out
+ store i16 %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_v2i16:
-define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
- %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
- %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
- %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
- %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+ %gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid
+ %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
+ %a = load <2 x i16>, ptr addrspace(1) %gep
+ %b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = sub <2 x i16> %a, %b
- store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+ store <2 x i16> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_sub_v4i16:
-define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
- %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
- %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
- %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
- %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
+ %gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
+ %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
+ %a = load <4 x i16>, ptr addrspace(1) %gep
+ %b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = sub <4 x i16> %a, %b
- store <4 x i16> %result, <4 x i16> addrspace(1)* %out
+ store <4 x i16> %result, ptr addrspace(1) %out
ret void
}
@@ -104,9 +104,9 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
-define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_sub_i64(ptr addrspace(1) noalias %out, i64 %a, i64 %b) nounwind {
%result = sub i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out, align 8
+ store i64 %result, ptr addrspace(1) %out, align 8
ret void
}
@@ -116,37 +116,37 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
-define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
+define amdgpu_kernel void @v_sub_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
- %a = load i64, i64 addrspace(1)* %a_ptr
- %b = load i64, i64 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid
+ %b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid
+ %a = load i64, ptr addrspace(1) %a_ptr
+ %b = load i64, ptr addrspace(1) %b_ptr
%result = sub i64 %a, %b
- store i64 %result, i64 addrspace(1)* %out, align 8
+ store i64 %result, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_test_sub_v2i64:
-define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v2i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid
+ %a = load <2 x i64>, ptr addrspace(1) %a_ptr
+ %b = load <2 x i64>, ptr addrspace(1) %b_ptr
%result = sub <2 x i64> %a, %b
- store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %result, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v_test_sub_v4i64:
-define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v4i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
- %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inA, i32 %tid
+ %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inB, i32 %tid
+ %a = load <4 x i64>, ptr addrspace(1) %a_ptr
+ %b = load <4 x i64>, ptr addrspace(1) %b_ptr
%result = sub <4 x i64> %a, %b
- store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ store <4 x i64> %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
index 03f0539e19be..323a84df4abc 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -3,59 +3,59 @@
; FUNC-LABEL: {{^}}tgid_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
-define amdgpu_kernel void @tgid_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_x(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}tgid_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T1.Y
-define amdgpu_kernel void @tgid_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_y(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}tgid_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T1.Z
-define amdgpu_kernel void @tgid_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_z(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}tidig_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
-define amdgpu_kernel void @tidig_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_x(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}tidig_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T0.Y
-define amdgpu_kernel void @tidig_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_y(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}tidig_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T0.Z
-define amdgpu_kernel void @tidig_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_z(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -65,28 +65,26 @@ entry:
; EG-NOT: VTX_READ
; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z
; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal
-define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
- %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
- %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
- %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
- %value = load i32, i32 addrspace(7)* %gep
- store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
+ %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
+ %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 4
+ %value = load i32, ptr addrspace(7) %gep
+ store i32 %value, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}test_implicit_dyn:
; 36 prepended implicit bytes + 8(out pointer + in) = 44
; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3
-define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
- %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
- %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
- %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
- %value = load i32, i32 addrspace(7)* %gep
- store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1 {
+ %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
+ %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 %in
+ %value = load i32, ptr addrspace(7) %gep
+ store i32 %value, ptr addrspace(1) %out
ret void
}
-declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0
+declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/r600cfg.ll b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
index 2996a1053da5..f2c7fcb38716 100644
--- a/llvm/test/CodeGen/AMDGPU/r600cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
@@ -83,7 +83,7 @@ ELSE45: ; preds = %ENDIF40
ENDIF43: ; preds = %ELSE45, %IF44
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
%52 = bitcast i32 %.sink to float
- %53 = load <4 x float>, <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, ptr addrspace(8) null
%54 = extractelement <4 x float> %53, i32 0
%55 = bitcast float %54 to i32
br label %LOOP47
diff --git a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
index 4ea503bf6098..cf2831f21fea 100644
--- a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -5,10 +5,10 @@
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 0(
-define amdgpu_kernel void @test_0(i32 %in0, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_0(i32 %in0, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -17,10 +17,10 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 1(
-define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -29,10 +29,10 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 2(
-define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
- store i32 %0, i32 addrspace(1)* %out
+ store i32 %0, ptr addrspace(1) %out
ret void
}
@@ -43,21 +43,21 @@ attributes #0 = { readnone }
!opencl.kernels = !{!0, !1, !2}
-!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50}
+!0 = !{ptr @test_0, !10, !20, !30, !40, !50}
!10 = !{!"kernel_arg_addr_space", i32 0, i32 1}
!20 = !{!"kernel_arg_access_qual", !"none", !"none"}
!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"}
!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"}
!50 = !{!"kernel_arg_type_qual", !"", !""}
-!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51}
+!1 = !{ptr @test_1, !11, !21, !31, !41, !51}
!11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1}
!21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"}
!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"}
!51 = !{!"kernel_arg_type_qual", !"", !"", !""}
-!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52}
+!2 = !{ptr @test_2, !12, !22, !32, !42, !52}
!12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1}
!22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
diff --git a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
index 2cdfb06268e4..8495486aa491 100644
--- a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
@@ -33,9 +33,9 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
-define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = sdiv i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -72,31 +72,31 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
-define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
;EG-LABEL: {{^}}test_sdiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = sdiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
;EG-LABEL: {{^}}test_srem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = srem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -106,11 +106,11 @@ define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = sdiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -120,10 +120,10 @@ define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = srem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/set-dx10.ll b/llvm/test/CodeGen/AMDGPU/set-dx10.ll
index 9a317a87540f..6bec5c87a36c 100644
--- a/llvm/test/CodeGen/AMDGPU/set-dx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-dx10.ll
@@ -8,13 +8,13 @@
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -22,11 +22,11 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -34,13 +34,13 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -48,11 +48,11 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -60,13 +60,13 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -74,11 +74,11 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -86,13 +86,13 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -100,11 +100,11 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -112,13 +112,13 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -126,11 +126,11 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
@@ -138,13 +138,13 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
- store i32 %3, i32 addrspace(1)* %out
+ store i32 %3, ptr addrspace(1) %out
ret void
}
@@ -152,10 +152,10 @@ entry:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
- store i32 %1, i32 addrspace(1)* %out
+ store i32 %1, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
index 7ac4e1d9fe4b..6eafce674f2a 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
@@ -11,10 +11,10 @@
; EG: LSHR {{\*?}} [[ADDR]]
; Works with the align 2 removed
-define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
%y = ashr <2 x i32> %x, <i32 7, i32 7>
- store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
+ store <2 x i32> %y, ptr addrspace(1) %out, align 2
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
index 7cf380520d42..0e7e9a0fbbc9 100644
--- a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
@@ -12,56 +12,56 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float>, <4 x float> addrspace(8)* null
+ %4 = load <4 x float>, ptr addrspace(8) null
%5 = extractelement <4 x float> %4, i32 1
- %6 = load <4 x float>, <4 x float> addrspace(8)* null
+ %6 = load <4 x float>, ptr addrspace(8) null
%7 = extractelement <4 x float> %6, i32 2
- %8 = load <4 x float>, <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, ptr addrspace(8) null
%9 = extractelement <4 x float> %8, i32 0
%10 = fmul float 0.000000e+00, %9
- %11 = load <4 x float>, <4 x float> addrspace(8)* null
+ %11 = load <4 x float>, ptr addrspace(8) null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %5, %12
- %14 = load <4 x float>, <4 x float> addrspace(8)* null
+ %14 = load <4 x float>, ptr addrspace(8) null
%15 = extractelement <4 x float> %14, i32 0
%16 = fmul float 0.000000e+00, %15
- %17 = load <4 x float>, <4 x float> addrspace(8)* null
+ %17 = load <4 x float>, ptr addrspace(8) null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float 0.000000e+00, %18
- %20 = load <4 x float>, <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, ptr addrspace(8) null
%21 = extractelement <4 x float> %20, i32 0
%22 = fmul float %7, %21
- %23 = load <4 x float>, <4 x float> addrspace(8)* null
+ %23 = load <4 x float>, ptr addrspace(8) null
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float 0.000000e+00, %24
- %26 = load <4 x float>, <4 x float> addrspace(8)* null
+ %26 = load <4 x float>, ptr addrspace(8) null
%27 = extractelement <4 x float> %26, i32 0
%28 = fmul float 0.000000e+00, %27
- %29 = load <4 x float>, <4 x float> addrspace(8)* null
+ %29 = load <4 x float>, ptr addrspace(8) null
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float 0.000000e+00, %30
- %32 = load <4 x float>, <4 x float> addrspace(8)* null
+ %32 = load <4 x float>, ptr addrspace(8) null
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float 0.000000e+00, %33
- %35 = load <4 x float>, <4 x float> addrspace(8)* null
+ %35 = load <4 x float>, ptr addrspace(8) null
%36 = extractelement <4 x float> %35, i32 0
%37 = fmul float 0.000000e+00, %36
- %38 = load <4 x float>, <4 x float> addrspace(8)* null
+ %38 = load <4 x float>, ptr addrspace(8) null
%39 = extractelement <4 x float> %38, i32 0
%40 = fmul float 1.000000e+00, %39
- %41 = load <4 x float>, <4 x float> addrspace(8)* null
+ %41 = load <4 x float>, ptr addrspace(8) null
%42 = extractelement <4 x float> %41, i32 0
%43 = fmul float 0.000000e+00, %42
- %44 = load <4 x float>, <4 x float> addrspace(8)* null
+ %44 = load <4 x float>, ptr addrspace(8) null
%45 = extractelement <4 x float> %44, i32 0
%46 = fmul float 0.000000e+00, %45
- %47 = load <4 x float>, <4 x float> addrspace(8)* null
+ %47 = load <4 x float>, ptr addrspace(8) null
%48 = extractelement <4 x float> %47, i32 0
%49 = fmul float 0.000000e+00, %48
- %50 = load <4 x float>, <4 x float> addrspace(8)* null
+ %50 = load <4 x float>, ptr addrspace(8) null
%51 = extractelement <4 x float> %50, i32 0
%52 = fmul float 0.000000e+00, %51
- %53 = load <4 x float>, <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, ptr addrspace(8) null
%54 = extractelement <4 x float> %53, i32 0
%55 = fmul float 1.000000e+00, %54
%56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = fadd float %0, 2.5
%3 = fmul float %1, 3.5
- %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 0
%6 = call float @llvm.cos.f32(float %5)
- %7 = load <4 x float>, <4 x float> addrspace(8)* null
+ %7 = load <4 x float>, ptr addrspace(8) null
%8 = extractelement <4 x float> %7, i32 0
- %9 = load <4 x float>, <4 x float> addrspace(8)* null
+ %9 = load <4 x float>, ptr addrspace(8) null
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %2, i32 0
%12 = insertelement <4 x float> %11, float %3, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
index 62cab48c65e9..4cf1e64a8bd5 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
@@ -33,9 +33,9 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
-define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -72,31 +72,31 @@ define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
-define amdgpu_kernel void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
;EG-LABEL: {{^}}test_udiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = udiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
;EG-LABEL: {{^}}test_urem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = urem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -106,11 +106,11 @@ define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 41
%2 = lshr i64 %y, 41
%result = udiv i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -120,17 +120,17 @@ define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 41
%2 = lshr i64 %y, 41
%result = urem i64 %1, %2
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
;EG-LABEL: {{^}}test_udiv_k:
-define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) {
+define amdgpu_kernel void @test_udiv_k(ptr addrspace(1) %out, i64 %x) {
%result = udiv i64 24, %x
- store i64 %result, i64 addrspace(1)* %out
+ store i64 %result, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
index b7d766aa395e..90cc40fb6517 100644
--- a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
@@ -1,15 +1,15 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
; CHECK-LABEL: {{^}}kernel_arg_i64:
-define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
- store i64 %a, i64 addrspace(1)* %out, align 8
+define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind {
+ store i64 %a, ptr addrspace(1) %out, align 8
ret void
}
; i64 arg works, v1i64 arg does not.
; CHECK-LABEL: {{^}}kernel_arg_v1i64:
-define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
- store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+define amdgpu_kernel void @kernel_arg_v1i64(ptr addrspace(1) %out, <1 x i64> %a) nounwind {
+ store <1 x i64> %a, ptr addrspace(1) %out, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
index f002a1474e02..f6a642ecccd9 100644
--- a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
@@ -6,9 +6,9 @@
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
-define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %v = load i32, i32 addrspace(1)* %in
- store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %v = load i32, ptr addrspace(1) %in
+ store i32 %v, ptr addrspace(1) %out
ret void
}
@@ -16,9 +16,9 @@ define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)*
; EG: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
; CM: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x00,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x00,0x00
-define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %v = load <4 x i32>, <4 x i32> addrspace(1)* %in
- store <4 x i32> %v, <4 x i32> addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch128(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %v = load <4 x i32>, ptr addrspace(1) %in
+ store <4 x i32> %v, ptr addrspace(1) %out
ret void
}
@@ -26,9 +26,9 @@ define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32>
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
-define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace(7)* %in) {
- %v = load i32, i32 addrspace(7)* %in
- store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32_id3(ptr addrspace(1) %out, ptr addrspace(7) %in) {
+ %v = load i32, ptr addrspace(7) %in
+ store i32 %v, ptr addrspace(1) %out
ret void
}
@@ -38,9 +38,9 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace
@t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
-define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
- %a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
- %v = load i32, i32 addrspace(4)* %a
- store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32_id2(ptr addrspace(1) %out, i32 %in) {
+ %a = getelementptr inbounds [4 x i32], ptr addrspace(4) @t, i32 0, i32 %in
+ %v = load i32, ptr addrspace(4) %a
+ store i32 %v, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
index 88ef9fd93c8f..3067ac75340d 100644
--- a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
@@ -5,16 +5,16 @@
; TODO: enable doubles
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
-define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
- %val = load double, double addrspace(1)* %in, align 8
+define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+ %val = load double, ptr addrspace(1) %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
- store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
+ store <2 x i32> %bc, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
-define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
+define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
@@ -25,12 +25,12 @@ if:
end:
%phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
- store <2 x double> %phi, <2 x double> addrspace(1)* %out
+ store <2 x double> %phi, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
-define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
+define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
@@ -41,6 +41,6 @@ if:
end:
%phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
- store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
+ store <2 x i64> %phi, ptr addrspace(1) %out
ret void
}
More information about the llvm-commits
mailing list