[llvm] da0293e - AMDGPU: Bulk update some r600 tests to opaque pointers

Mon Nov 28 08:25:50 PST 2022

Author: Matt Arsenault
Date: 2022-11-28T11:25:44-05:00
New Revision: da0293e3cc66803217e9331d95a89ab6b59cdaae

URL: https://github.com/llvm/llvm-project/commit/da0293e3cc66803217e9331d95a89ab6b59cdaae
DIFF: https://github.com/llvm/llvm-project/commit/da0293e3cc66803217e9331d95a89ab6b59cdaae.diff

LOG: AMDGPU: Bulk update some r600 tests to opaque pointers

r600.amdgpu-alias-analysis.ll has something strange going on where
AliasAnalyisEvaluator's printing is reproducing the typed pointer
syntax.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
    llvm/test/CodeGen/AMDGPU/big_alu.ll
    llvm/test/CodeGen/AMDGPU/elf.r600.ll
    llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
    llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
    llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
    llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
    llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
    llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
    llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
    llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
    llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
    llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
    llvm/test/CodeGen/AMDGPU/pv.ll
    llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
    llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
    llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
    llvm/test/CodeGen/AMDGPU/r600.add.ll
    llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
    llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
    llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
    llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
    llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
    llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
    llvm/test/CodeGen/AMDGPU/r600.sub.ll
    llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
    llvm/test/CodeGen/AMDGPU/r600cfg.ll
    llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
    llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
    llvm/test/CodeGen/AMDGPU/set-dx10.ll
    llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
    llvm/test/CodeGen/AMDGPU/swizzle-export.ll
    llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
    llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
    llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
    llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
index 34eb088b16f4..eb29e0ac8ec6 100644

--- a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
@@ -4,7 +4,7 @@
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
 ;
-define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_def(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; R600-LABEL: bfi_def:
 ; R600:       ; %bb.0: ; %entry
 ; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -20,13 +20,13 @@ entry:
   %1 = and i32 %z, %0
   %2 = and i32 %y, %x
   %3 = or i32 %1, %2
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
 ; SHA-256 Ch function
 ; z ^ (x & (y ^ z))
-define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; R600-LABEL: bfi_sha256_ch:
 ; R600:       ; %bb.0: ; %entry
 ; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -41,13 +41,13 @@ entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
   %2 = xor i32 %z, %1
-  store i32 %2, i32 addrspace(1)* %out
+  store i32 %2, ptr addrspace(1) %out
   ret void
 }
 
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
-define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; R600-LABEL: bfi_sha256_ma:
 ; R600:       ; %bb.0: ; %entry
 ; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
@@ -64,7 +64,7 @@ entry:
   %1 = or i32 %x, %z
   %2 = and i32 %y, %1
   %3 = or i32 %0, %2
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -149,7 +149,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
   %and1 = and i64 %not.a, %mask
   %bitselect = or i64 %and0, %and1
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -176,7 +176,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
   %bitselect = xor i64 %and, %mask
 
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
   %bitselect = xor i64 %and, %mask
 
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -232,6 +232,6 @@ entry:
   %or1 = or i64 %and0, %and1
 
   %scalar.use = add i64 %or1, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll
index 51387c8b79cb..999c56645f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/big_alu.ll
+++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll
@@ -50,29 +50,29 @@ main_body:
   %tmp43 = extractelement <4 x float> %reg7, i32 1
   %tmp44 = extractelement <4 x float> %reg7, i32 2
   %tmp45 = extractelement <4 x float> %reg7, i32 3
-  %tmp46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %tmp46 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
   %tmp47 = extractelement <4 x float> %tmp46, i32 0
-  %tmp48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %tmp48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
   %tmp49 = extractelement <4 x float> %tmp48, i32 1
-  %tmp50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %tmp50 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
   %tmp51 = extractelement <4 x float> %tmp50, i32 2
-  %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12)
   %tmp53 = extractelement <4 x float> %tmp52, i32 0
-  %tmp54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %tmp54 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
   %tmp55 = extractelement <4 x float> %tmp54, i32 0
-  %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
   %tmp57 = extractelement <4 x float> %tmp56, i32 1
-  %tmp58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %tmp58 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
   %tmp59 = extractelement <4 x float> %tmp58, i32 2
-  %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
   %tmp61 = extractelement <4 x float> %tmp60, i32 3
-  %tmp62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %tmp62 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
   %tmp63 = extractelement <4 x float> %tmp62, i32 0
-  %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
   %tmp65 = extractelement <4 x float> %tmp64, i32 1
-  %tmp66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %tmp66 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
   %tmp67 = extractelement <4 x float> %tmp66, i32 2
-  %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp69 = extractelement <4 x float> %tmp68, i32 0
   %tmp70 = fcmp oge float %tmp69, 3.500000e+00
   %tmp71 = sext i1 %tmp70 to i32
@@ -80,7 +80,7 @@ main_body:
   %tmp73 = bitcast float %tmp72 to i32
   %tmp74 = icmp ne i32 %tmp73, 0
   %. = select i1 %tmp74, float 0.000000e+00, float 0.000000e+00
-  %tmp75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp75 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp76 = extractelement <4 x float> %tmp75, i32 0
   %tmp77 = fcmp oge float %tmp76, 2.000000e+00
   %tmp78 = sext i1 %tmp77 to i32
@@ -134,7 +134,7 @@ IF137:                                            ; preds = %main_body
   %tmp123 = insertelement <4 x float> %tmp122, float 0.000000e+00, i32 3
   %tmp124 = call float @llvm.r600.dot4(<4 x float> %tmp119, <4 x float> %tmp123)
   %tmp125 = fdiv float 1.000000e+00, %tmp124
-  %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %tmp127 = extractelement <4 x float> %tmp126, i32 0
   %tmp128 = fmul float %tmp127, %tmp125
   %tmp129 = fmul float %tmp103, %tmp128
@@ -360,15 +360,15 @@ ENDIF136:                                         ; preds = %ENDIF154, %main_bod
   %tmp333 = fmul float %tmp318, %tmp332
   %tmp334 = fmul float %tmp320, %tmp332
   %tmp335 = fmul float %tmp322, %tmp332
-  %tmp336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %tmp336 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
   %tmp337 = extractelement <4 x float> %tmp336, i32 0
   %tmp338 = fsub float -0.000000e+00, %tmp337
   %tmp339 = fadd float 1.000000e+00, %tmp338
-  %tmp340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %tmp340 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %tmp341 = extractelement <4 x float> %tmp340, i32 0
   %tmp342 = fsub float -0.000000e+00, %tmp341
   %tmp343 = fadd float 1.000000e+00, %tmp342
-  %tmp344 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %tmp344 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
   %tmp345 = extractelement <4 x float> %tmp344, i32 0
   %tmp346 = fsub float -0.000000e+00, %tmp345
   %tmp347 = fadd float 1.000000e+00, %tmp346
@@ -1136,7 +1136,7 @@ ENDIF175:                                         ; preds = %IF176, %ENDIF172
   %temp92.11 = phi float [ %tmp856, %IF176 ], [ %temp92.10, %ENDIF172 ]
   %temp93.5 = phi float [ %tmp857, %IF176 ], [ %temp93.4, %ENDIF172 ]
   %temp94.5 = phi float [ %tmp858, %IF176 ], [ %temp94.4, %ENDIF172 ]
-  %tmp859 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %tmp859 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
   %tmp860 = extractelement <4 x float> %tmp859, i32 0
   %tmp861 = fcmp olt float %tmp860, %tmp181
   %tmp862 = sext i1 %tmp861 to i32
@@ -1257,12 +1257,12 @@ ENDIF178:                                         ; preds = %IF179, %ENDIF175
   %tmp931 = fmul float %temp87.6, %tmp927
   %tmp932 = fmul float %tmp2, -2.000000e+00
   %tmp933 = fadd float %tmp932, 1.000000e+00
-  %tmp934 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %tmp934 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 23)
   %tmp935 = extractelement <4 x float> %tmp934, i32 2
   %tmp936 = fsub float -0.000000e+00, %tmp935
   %tmp937 = fadd float %tmp933, %tmp936
   %tmp938 = fdiv float 1.000000e+00, %tmp937
-  %tmp939 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %tmp939 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 24)
   %tmp940 = extractelement <4 x float> %tmp939, i32 2
   %tmp941 = fmul float %tmp940, %tmp938
   %tmp942 = fsub float -0.000000e+00, %tmp53

diff  --git a/llvm/test/CodeGen/AMDGPU/elf.r600.ll b/llvm/test/CodeGen/AMDGPU/elf.r600.ll
index 276310724630..503d518d1ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/elf.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf.r600.ll
@@ -9,9 +9,9 @@
 ; CONFIG-NEXT: .long   2
 ; CONFIG-NEXT: .long   165900
 ; CONFIG-NEXT: .long   0
-define amdgpu_kernel void @test(float addrspace(1)* %out, i32 %p) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %p) {
    %i = add i32 %p, 2
    %r = bitcast i32 %i to float
-   store float %r, float addrspace(1)* %out
+   store float %r, ptr addrspace(1) %out
    ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
index 5cb0c616d15f..bb7257eb824a 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
 
 define amdgpu_ps void @fetch_limits_r600() {
 entry:
-  %tmp = load <4 x float>, <4 x float> addrspace(8)* null
-  %tmp1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %tmp3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %tmp4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %tmp6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %tmp7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %tmp = load <4 x float>, ptr addrspace(8) null
+  %tmp1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
+  %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
+  %tmp3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
+  %tmp4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
+  %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
+  %tmp6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
+  %tmp7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
+  %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
   %tmp9 = shufflevector <4 x float> %tmp, <4 x float> %tmp, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %tmp10 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
   %tmp11 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

diff  --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
index d8f7c0daa8de..e0afdc8c6ba7 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
 
 define amdgpu_ps void @fetch_limits_r700() {
 entry:
-  %0 = load <4 x float>, <4 x float> addrspace(8)* null
-  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %0 = load <4 x float>, ptr addrspace(8) null
+  %1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
+  %2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
+  %3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
+  %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
+  %5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
+  %6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
+  %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
+  %8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
+  %9 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
+  %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
+  %11 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11)
+  %12 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12)
+  %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 13)
+  %14 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14)
+  %15 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 15)
+  %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16)
   %17 = shufflevector <4 x float> %0, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %18 = call <4 x float> @llvm.r600.tex(<4 x float> %17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
   %19 = shufflevector <4 x float> %1, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

diff  --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
index 71bb4afa64ef..047e27be787e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
@@ -3,9 +3,9 @@
 ; EG-LABEL: {{^}}test_fmax_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
   %val = call float @llvm.maxnum.f32(float %a, float %b)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, flo
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
-  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+  store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
 ; EG: MAX_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
-  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
-  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
 }
 
@@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
 ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
-  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+  store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
 }
 
@@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 ; EG: 2143289344(nan)
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MAX_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmax_var_immediate_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 * [[OUT]]
-define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.maxnum.f32(float %a, float 2.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmax_immediate_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.maxnum.f32(float 2.0, float %a)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmax_var_literal_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.maxnum.f32(float %a, float 99.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmax_literal_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.maxnum.f32(float 99.0, float %a)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
index 713e95c7f46e..8d85af091f53 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
@@ -3,9 +3,9 @@
 ; EG-LABEL: {{^}}test_fmin_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
   %val = call float @llvm.minnum.f32(float %a, float %b)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, flo
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
   %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
-  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+  store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
 ; EG: MIN_DX10 {{.*}}[[OUT]]
-define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
   %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
-  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
-define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
   %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b)
-  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
 }
 
@@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
 ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
-define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
   %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b)
-  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+  store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
 }
 
@@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 1.0, float 2.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 {
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
 ; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %o
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float -0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG-NOT: MIN_DX10
 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
-define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float -0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmin_var_immediate_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.minnum.f32(float %a, float 2.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmin_immediate_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.minnum.f32(float 2.0, float %a)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmin_var_literal_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.minnum.f32(float %a, float 99.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}fmin_literal_var_f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
 ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
-define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a) #0 {
   %val = call float @llvm.minnum.f32(float 99.0, float %a)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
index b5a0de95acf5..9781e3fc828f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
@@ -7,15 +7,15 @@
 ; CHECK: CUBE * T{{[0-9]}}.W
 define amdgpu_ps void @cube() {
 main_body:
-  %tmp = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp1 = extractelement <4 x float> %tmp, i32 3
-  %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp3 = extractelement <4 x float> %tmp2, i32 0
   %tmp4 = fdiv float %tmp3, %tmp1
-  %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp6 = extractelement <4 x float> %tmp5, i32 1
   %tmp7 = fdiv float %tmp6, %tmp1
-  %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp9 = extractelement <4 x float> %tmp8, i32 2
   %tmp10 = fdiv float %tmp9, %tmp1
   %tmp11 = insertelement <4 x float> undef, float %tmp4, i32 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
index de8a47741c94..21c8ea6c281f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
@@ -2,10 +2,10 @@
 
 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone
 
-define amdgpu_kernel void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
-  %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
-  %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
+define amdgpu_kernel void @test_dp4(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
+  %src0 = load <4 x float>, ptr addrspace(1) %a, align 16
+  %src1 = load <4 x float>, ptr addrspace(1) %b, align 16
   %dp4 = call float @llvm.r600.dot4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
-  store float %dp4, float addrspace(1)* %out, align 4
+  store float %dp4, ptr addrspace(1) %out, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
index 93caafbb9524..7859bac7bc52 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
@@ -2,18 +2,18 @@
 
 ; EG-LABEL: {{^}}test_group_barrier:
 ; EG: GROUP_BARRIER
-define amdgpu_kernel void @test_group_barrier(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_group_barrier(ptr addrspace(1) %out) #0 {
 entry:
   %tmp = call i32 @llvm.r600.read.tidig.x()
-  %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp
-  store i32 %tmp, i32 addrspace(1)* %tmp1
+  %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
+  store i32 %tmp, ptr addrspace(1) %tmp1
   call void @llvm.r600.group.barrier()
   %tmp2 = call i32 @llvm.r600.read.local.size.x()
   %tmp3 = sub i32 %tmp2, 1
   %tmp4 = sub i32 %tmp3, %tmp
-  %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4
-  %tmp6 = load i32, i32 addrspace(1)* %tmp5
-  store i32 %tmp6, i32 addrspace(1)* %tmp1
+  %tmp5 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp4
+  %tmp6 = load i32, ptr addrspace(1) %tmp5
+  store i32 %tmp6, ptr addrspace(1) %tmp1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
index 7f18fec3db6d..64f2fd26f1df 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -14,10 +14,10 @@
 
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.x() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -29,10 +29,10 @@ entry:
 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.y() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -44,10 +44,10 @@ entry:
 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.z() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -57,12 +57,12 @@ entry:
 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[X]], s[[Y]]
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) {
 entry:
   %x = call i32 @llvm.r600.read.local.size.x() #0
   %y = call i32 @llvm.r600.read.local.size.y() #0
   %val = mul i32 %x, %y
-  store i32 %val, i32 addrspace(1)* %out
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -76,12 +76,12 @@ entry:
 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], [[X]], [[Z]]
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) {
 entry:
   %x = call i32 @llvm.r600.read.local.size.x() #0
   %z = call i32 @llvm.r600.read.local.size.z() #0
   %val = mul i32 %x, %z
-  store i32 %val, i32 addrspace(1)* %out
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -94,12 +94,12 @@ entry:
 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[#LOAD + 0]], s[[#LOAD + 1]]
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) {
 entry:
   %y = call i32 @llvm.r600.read.local.size.y() #0
   %z = call i32 @llvm.r600.read.local.size.z() #0
   %val = mul i32 %y, %z
-  store i32 %val, i32 addrspace(1)* %out
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -115,14 +115,14 @@ entry:
 ; GCN: s_add_i32 [[VAL:s[0-9]+]], [[M]], s[[Z]]
 ; GCN-DAG: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) {
 entry:
   %x = call i32 @llvm.r600.read.local.size.x() #0
   %y = call i32 @llvm.r600.read.local.size.y() #0
   %z = call i32 @llvm.r600.read.local.size.z() #0
   %xy = mul i32 %x, %y
   %xyz = add i32 %xy, %z
-  store i32 %xyz, i32 addrspace(1)* %out
+  store i32 %xyz, ptr addrspace(1) %out
   ret void
 }
 
@@ -132,12 +132,12 @@ entry:
 ; GCN-NOT: 0xffff
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) {
 entry:
   %size = call i32 @llvm.r600.read.local.size.x() #0
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
-  store i32 %shr, i32 addrspace(1)* %out
+  store i32 %shr, ptr addrspace(1) %out
   ret void
 }
 
@@ -147,12 +147,12 @@ entry:
 ; GCN-NOT: 0xffff
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) {
 entry:
   %size = call i32 @llvm.r600.read.local.size.y() #0
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
-  store i32 %shr, i32 addrspace(1)* %out
+  store i32 %shr, ptr addrspace(1) %out
   ret void
 }
 
@@ -162,12 +162,12 @@ entry:
 ; GCN-NOT: 0xffff
 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
 ; GCN-NEXT: buffer_store_dword [[VVAL]]
-define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) {
 entry:
   %size = call i32 @llvm.r600.read.local.size.z() #0
   %shl = shl i32 %size, 16
   %shr = lshr i32 %shl, 16
-  store i32 %shr, i32 addrspace(1)* %out
+  store i32 %shr, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
index 90d076d4fb4d..f2c25c79c780 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
@@ -4,8 +4,8 @@ declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
 
 ; EG-LABEL: {{^}}rsq_clamped_f32:
 ; EG: RECIPSQRT_CLAMPED
-define amdgpu_kernel void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
+define amdgpu_kernel void @rsq_clamped_f32(ptr addrspace(1) %out, float %src) nounwind {
   %rsq_clamped = call float @llvm.r600.recipsqrt.clamped.f32(float %src)
-  store float %rsq_clamped, float addrspace(1)* %out, align 4
+  store float %rsq_clamped, ptr addrspace(1) %out, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
index d9177b39b8ac..004d348dfbd2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
@@ -4,25 +4,25 @@ declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone
 
 ; EG-LABEL: {{^}}recipsqrt.ieee_f32:
 ; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32(float addrspace(1)* %out, float %src) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32(ptr addrspace(1) %out, float %src) nounwind {
   %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float %src) nounwind readnone
-  store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+  store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; TODO: Really these should be constant folded
 ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_4.0
 ; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(ptr addrspace(1) %out) nounwind {
   %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 4.0) nounwind readnone
-  store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+  store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_100.0
 ; EG: RECIPSQRT_IEEE
-define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(ptr addrspace(1) %out) nounwind {
   %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 100.0) nounwind readnone
-  store float %recipsqrt.ieee, float addrspace(1)* %out, align 4
+  store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
index 98044917e2b0..7ec4072f2622 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
@@ -17,9 +17,9 @@
 ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN
 ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
 
-define amdgpu_kernel void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 bb:
-  %addr = load <4 x float>, <4 x float> addrspace(1)* %in
+  %addr = load <4 x float>, ptr addrspace(1) %in
   %tmp = shufflevector <4 x float> %addr, <4 x float> %addr, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %tmp1 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -52,7 +52,7 @@ bb:
   %tmp29 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1)
   %tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1)
-  store <4 x float> %tmp31, <4 x float> addrspace(1)* %out
+  store <4 x float> %tmp31, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
index c8f4e4c986a7..b0c8cc2810ff 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
@@ -15,19 +15,19 @@
 ; EG-NEXT: ALU clause
 
 ; EG: LDS_READ_RET
-define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 {
 entry:
   %y.i = call i32 @llvm.r600.read.tidig.x() #1
-  %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
-  store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %y.i
+  store i32 %y.i, ptr addrspace(3) %arrayidx, align 4
   %add = add nsw i32 %y.i, 1
   %cmp = icmp eq i32 %add, 16
   %.add = select i1 %cmp, i32 0, i32 %add
   call void @llvm.r600.group.barrier()
-  %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
-  %tmp = load i32, i32 addrspace(3)* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
-  store i32 %tmp, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %.add
+  %tmp = load i32, ptr addrspace(3) %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %y.i
+  store i32 %tmp, ptr addrspace(1) %arrayidx2, align 4
   ret void
 }
 
@@ -57,25 +57,25 @@ entry:
 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
 
-define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) #0 {
 entry:
   %x.i = call i32 @llvm.r600.read.tidig.x() #1
-  %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
-  store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
+  store i32 %x.i, ptr addrspace(3) %arrayidx, align 4
   %mul = shl nsw i32 %x.i, 1
-  %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
-  store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
+  store i32 %mul, ptr addrspace(3) %arrayidx1, align 4
   %sub = sub nsw i32 3, %x.i
   call void @llvm.r600.group.barrier()
-  %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
-  %tmp = load i32, i32 addrspace(3)* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
-  store i32 %tmp, i32 addrspace(1)* %arrayidx3, align 4
-  %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
-  %tmp1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
+  %arrayidx2 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %sub
+  %tmp = load i32, ptr addrspace(3) %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %x.i
+  store i32 %tmp, ptr addrspace(1) %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %sub
+  %tmp1 = load i32, ptr addrspace(3) %arrayidx4, align 4
   %add = add nsw i32 %x.i, 4
-  %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
-  store i32 %tmp1, i32 addrspace(1)* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %add
+  store i32 %tmp1, ptr addrspace(1) %arrayidx5, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
index 5bb21cb75952..98a7bee2c6d6 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefixes=CM %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s
 
-define amdgpu_kernel void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
 ; CM-LABEL: test_umul24_i32:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -38,12 +38,12 @@ entry:
   %1 = shl i32 %b, 8
   %b_24 = lshr i32 %1, 8
   %2 = mul i32 %a_24, %b_24
-  store i32 %2, i32 addrspace(1)* %out
+  store i32 %2, ptr addrspace(1) %out
   ret void
 }
 
 ; The result must be sign-extended.
-define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) {
+define amdgpu_kernel void @test_umul24_i16_sext(ptr addrspace(1) %out, i16 %a, i16 %b) {
 ; CM-LABEL: test_umul24_i16_sext:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 0, @10, KC0[], KC1[]
@@ -88,12 +88,12 @@ define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a,
 entry:
   %mul = mul i16 %a, %b
   %ext = sext i16 %mul to i32
-  store i32 %ext, i32 addrspace(1)* %out
+  store i32 %ext, ptr addrspace(1) %out
   ret void
 }
 
 ; The result must be sign-extended.
-define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) {
+define amdgpu_kernel void @test_umul24_i8(ptr addrspace(1) %out, i8 %a, i8 %b) {
 ; CM-LABEL: test_umul24_i8:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 0, @10, KC0[], KC1[]
@@ -138,11 +138,11 @@ define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b)
 entry:
   %mul = mul i8 %a, %b
   %ext = sext i8 %mul to i32
-  store i32 %ext, i32 addrspace(1)* %out
+  store i32 %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @test_umulhi24_i32_i64(ptr addrspace(1) %out, i32 %a, i32 %b) {
 ; CM-LABEL: test_umulhi24_i32_i64:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
@@ -175,11 +175,11 @@ entry:
   %mul48 = mul i64 %a.24.i64, %b.24.i64
   %mul48.hi = lshr i64 %mul48, 32
   %mul24hi = trunc i64 %mul48.hi to i32
-  store i32 %mul24hi, i32 addrspace(1)* %out
+  store i32 %mul24hi, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umulhi24(ptr addrspace(1) %out, i64 %a, i64 %b) {
 ; CM-LABEL: test_umulhi24:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
@@ -210,12 +210,12 @@ entry:
   %mul48 = mul i64 %a.24, %b.24
   %mul48.hi = lshr i64 %mul48, 32
   %mul24.hi = trunc i64 %mul48.hi to i32
-  store i32 %mul24.hi, i32 addrspace(1)* %out
+  store i32 %mul24.hi, ptr addrspace(1) %out
   ret void
 }
 
 ; Multiply with 24-bit inputs and 64-bit output.
-define amdgpu_kernel void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @test_umul24_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
 ; CM-LABEL: test_umul24_i64:
 ; CM:       ; %bb.0: ; %entry
 ; CM-NEXT:    ALU 12, @4, KC0[CB0:0-32], KC1[]
@@ -257,6 +257,6 @@ entry:
   %tmp1 = shl i64 %b, 40
   %b_24 = lshr i64 %tmp1, 40
   %tmp2 = mul i64 %a_24, %b_24
-  store i64 %tmp2, i64 addrspace(1)* %out
+  store i64 %tmp2, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
index 4685e54d07f9..5af9f78b471c 100644
--- a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
+++ b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll
@@ -22,35 +22,35 @@ entry:
   %c1 = alloca i32, align 4, addrspace(5)
   %d1 = alloca i32, align 4, addrspace(5)
   %data = alloca i32, align 4, addrspace(5)
-  %0 = load i32, i32 addrspace(5)* %a0, align 4
-  %1 = load i32, i32 addrspace(5)* %b0, align 4
+  %0 = load i32, ptr addrspace(5) %a0, align 4
+  %1 = load i32, ptr addrspace(5) %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32, i32 addrspace(5)* %c0, align 4
-  %3 = load i32, i32 addrspace(5)* %d0, align 4
+  %2 = load i32, ptr addrspace(5) %c0, align 4
+  %3 = load i32, ptr addrspace(5) %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %land.lhs.true
-  store i32 1, i32 addrspace(5)* %data, align 4
+  store i32 1, ptr addrspace(5) %data, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %land.lhs.true, %entry
-  %4 = load i32, i32 addrspace(5)* %a1, align 4
-  %5 = load i32, i32 addrspace(5)* %b1, align 4
+  %4 = load i32, ptr addrspace(5) %a1, align 4
+  %5 = load i32, ptr addrspace(5) %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.end6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32, i32 addrspace(5)* %c1, align 4
-  %7 = load i32, i32 addrspace(5)* %d1, align 4
+  %6 = load i32, ptr addrspace(5) %c1, align 4
+  %7 = load i32, ptr addrspace(5) %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.end6
 
 if.then5:                                         ; preds = %land.lhs.true3
-  store i32 1, i32 addrspace(5)* %data, align 4
+  store i32 1, ptr addrspace(5) %data, align 4
   br label %if.end6
 
 if.end6:                                          ; preds = %if.then5, %land.lhs.true3, %if.end

diff  --git a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
index 1d55c9ee074a..fc71e161ce69 100644
--- a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
+++ b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll
@@ -23,14 +23,14 @@ entry:
   %c1 = alloca i32, align 4, addrspace(5)
   %d1 = alloca i32, align 4, addrspace(5)
   %data = alloca i32, align 4, addrspace(5)
-  %0 = load i32, i32 addrspace(5)* %a0, align 4
-  %1 = load i32, i32 addrspace(5)* %b0, align 4
+  %0 = load i32, ptr addrspace(5) %a0, align 4
+  %1 = load i32, ptr addrspace(5) %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.else
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32, i32 addrspace(5)* %c0, align 4
-  %3 = load i32, i32 addrspace(5)* %d0, align 4
+  %2 = load i32, ptr addrspace(5) %c0, align 4
+  %3 = load i32, ptr addrspace(5) %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -38,18 +38,18 @@ if.then:                                          ; preds = %land.lhs.true
   br label %if.end
 
 if.else:                                          ; preds = %land.lhs.true, %entry
-  store i32 1, i32 addrspace(5)* %data, align 4
+  store i32 1, ptr addrspace(5) %data, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %4 = load i32, i32 addrspace(5)* %a1, align 4
-  %5 = load i32, i32 addrspace(5)* %b1, align 4
+  %4 = load i32, ptr addrspace(5) %a1, align 4
+  %5 = load i32, ptr addrspace(5) %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.else6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32, i32 addrspace(5)* %c1, align 4
-  %7 = load i32, i32 addrspace(5)* %d1, align 4
+  %6 = load i32, ptr addrspace(5) %c1, align 4
+  %7 = load i32, ptr addrspace(5) %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.else6
 
@@ -57,7 +57,7 @@ if.then5:                                         ; preds = %land.lhs.true3
   br label %if.end7
 
 if.else6:                                         ; preds = %land.lhs.true3, %if.end
-  store i32 1, i32 addrspace(5)* %data, align 4
+  store i32 1, ptr addrspace(5) %data, align 4
   br label %if.end7
 
 if.end7:                                          ; preds = %if.else6, %if.then5

diff  --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
index 6c8f308bd65e..81e8dfcdb3a4 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
@@ -18,23 +18,22 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 ; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1
 ; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1
 
-define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
+define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 {
 entry:
   %stack = alloca [5 x i32], align 4, addrspace(5)
-  %0 = load i32, i32 addrspace(1)* %in, align 4
-  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0
-  store i32 4, i32 addrspace(5)* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1
-  store i32 5, i32 addrspace(5)* %arrayidx3, align 4
-  %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0
-  %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4
-  store i32 %2, i32 addrspace(1)* %out, align 4
-  %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1
-  %3 = load i32, i32 addrspace(5)* %arrayidx12
-  %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
-  store i32 %3, i32 addrspace(1)* %arrayidx13
+  %0 = load i32, ptr addrspace(1) %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %0
+  store i32 4, ptr addrspace(5) %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1
+  %1 = load i32, ptr addrspace(1) %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %1
+  store i32 5, ptr addrspace(5) %arrayidx3, align 4
+  %2 = load i32, ptr addrspace(5) %stack, align 4
+  store i32 %2, ptr addrspace(1) %out, align 4
+  %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 1
+  %3 = load i32, ptr addrspace(5) %arrayidx12
+  %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1
+  store i32 %3, ptr addrspace(1) %arrayidx13
   ret void
 }
 
@@ -49,24 +48,20 @@ entry:
 ; R600-NOT: MOVA_INT
 %struct.point = type { i32, i32 }
 
-define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 {
 entry:
   %a = alloca %struct.point, addrspace(5)
   %b = alloca %struct.point, addrspace(5)
-  %a.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
-  %a.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1
-  %b.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
-  %b.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %a.x.ptr
-  store i32 1, i32 addrspace(5)* %a.y.ptr
-  store i32 2, i32 addrspace(5)* %b.x.ptr
-  store i32 3, i32 addrspace(5)* %b.y.ptr
-  %a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0
-  %b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0
-  %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr
-  %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr
+  %a.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %a, i32 0, i32 1
+  %b.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %b, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %a
+  store i32 1, ptr addrspace(5) %a.y.ptr
+  store i32 2, ptr addrspace(5) %b
+  store i32 3, ptr addrspace(5) %b.y.ptr
+  %a.indirect = load i32, ptr addrspace(5) %a
+  %b.indirect = load i32, ptr addrspace(5) %b
   %0 = add i32 %a.indirect, %b.indirect
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -77,69 +72,63 @@ entry:
 ; FUNC-LABEL: {{^}}direct_loop:
 ; R600-NOT: MOVA_INT
 
-define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
   %prv_array_const = alloca [2 x i32], addrspace(5)
   %prv_array = alloca [2 x i32], addrspace(5)
-  %a = load i32, i32 addrspace(1)* %in
-  %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %b = load i32, i32 addrspace(1)* %b_src_ptr
-  %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
-  store i32 %a, i32 addrspace(5)* %a_dst_ptr
-  %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1
-  store i32 %b, i32 addrspace(5)* %b_dst_ptr
+  %a = load i32, ptr addrspace(1) %in
+  %b_src_ptr = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1
+  %b = load i32, ptr addrspace(1) %b_src_ptr
+  store i32 %a, ptr addrspace(5) %prv_array_const
+  %b_dst_ptr = getelementptr inbounds [2 x i32], ptr addrspace(5) %prv_array_const, i32 0, i32 1
+  store i32 %b, ptr addrspace(5) %b_dst_ptr
   br label %for.body
 
 for.body:
   %inc = phi i32 [0, %entry], [%count, %for.body]
-  %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0
-  %x = load i32, i32 addrspace(5)* %x_ptr
-  %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
-  %y = load i32, i32 addrspace(5)* %y_ptr
+  %x = load i32, ptr addrspace(5) %prv_array_const
+  %y = load i32, ptr addrspace(5) %prv_array
   %xy = add i32 %x, %y
-  store i32 %xy, i32 addrspace(5)* %y_ptr
+  store i32 %xy, ptr addrspace(5) %prv_array
   %count = add i32 %inc, 1
   %done = icmp eq i32 %count, 4095
   br i1 %done, label %for.end, label %for.body
 
 for.end:
-  %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0
-  %value = load i32, i32 addrspace(5)* %value_ptr
-  store i32 %value, i32 addrspace(1)* %out
+  %value = load i32, ptr addrspace(5) %prv_array
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}short_array:
 
 ; R600-VECT: MOVA_INT
-define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %0 = alloca [2 x i16], addrspace(5)
-  %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0
-  %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1
-  store i16 0, i16 addrspace(5)* %1
-  store i16 1, i16 addrspace(5)* %2
-  %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index
-  %4 = load i16, i16 addrspace(5)* %3
-  %5 = sext i16 %4 to i32
-  store i32 %5, i32 addrspace(1)* %out
+  %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1
+  store i16 0, ptr addrspace(5) %0
+  store i16 1, ptr addrspace(5) %1
+  %2 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 %index
+  %3 = load i16, ptr addrspace(5) %2
+  %4 = sext i16 %3 to i32
+  store i32 %4, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}char_array:
 
 ; R600-VECT: MOVA_INT
-define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %0 = alloca [2 x i8], addrspace(5)
-  %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0
-  %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1
-  store i8 0, i8 addrspace(5)* %1
-  store i8 1, i8 addrspace(5)* %2
-  %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index
-  %4 = load i8, i8 addrspace(5)* %3
-  %5 = sext i8 %4 to i32
-  store i32 %5, i32 addrspace(1)* %out
+  %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1
+  store i8 0, ptr addrspace(5) %0
+  store i8 1, ptr addrspace(5) %1
+  %2 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 %index
+  %3 = load i8, ptr addrspace(5) %2
+  %4 = sext i8 %3 to i32
+  store i32 %4, ptr addrspace(1) %out
   ret void
 
 }
@@ -150,127 +139,120 @@ entry:
 ; R600-NOT: MOV T0.X
 ; Additional check in case the move ends up in the last slot
 ; R600-NOT: MOV * TO.X
-define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) #0 {
 entry:
   %0 = alloca [2 x i32], addrspace(5)
-  %1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
-  %2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %1
-  store i32 1, i32 addrspace(5)* %2
-  %3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
-  %4 = load i32, i32 addrspace(5)* %3
-  %5 = call i32 @llvm.r600.read.tidig.x()
-  %6 = add i32 %4, %5
-  store i32 %6, i32 addrspace(1)* %out
+  %1 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %0
+  store i32 1, ptr addrspace(5) %1
+  %2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in
+  %3 = load i32, ptr addrspace(5) %2
+  %4 = call i32 @llvm.r600.read.tidig.x()
+  %5 = add i32 %3, %4
+  store i32 %5, ptr addrspace(1) %out
   ret void
 }
 
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
 ; FUNC-LABEL: {{^}}no_overlap:
-define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 {
 entry:
   %0 = alloca [3 x i8], align 1, addrspace(5)
   %1 = alloca [2 x i8], align 1, addrspace(5)
-  %2 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0
-  %3 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1
-  %4 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2
-  %5 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0
-  %6 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1
-  store i8 0, i8 addrspace(5)* %2
-  store i8 1, i8 addrspace(5)* %3
-  store i8 2, i8 addrspace(5)* %4
-  store i8 1, i8 addrspace(5)* %5
-  store i8 0, i8 addrspace(5)* %6
-  %7 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in
-  %8 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in
-  %9 = load i8, i8 addrspace(5)* %7
-  %10 = load i8, i8 addrspace(5)* %8
-  %11 = add i8 %9, %10
-  %12 = sext i8 %11 to i32
-  store i32 %12, i32 addrspace(1)* %out
+  %2 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 1
+  %3 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 2
+  %4 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 1
+  store i8 0, ptr addrspace(5) %0
+  store i8 1, ptr addrspace(5) %2
+  store i8 2, ptr addrspace(5) %3
+  store i8 1, ptr addrspace(5) %1
+  store i8 0, ptr addrspace(5) %4
+  %5 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 %in
+  %6 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 %in
+  %7 = load i8, ptr addrspace(5) %5
+  %8 = load i8, ptr addrspace(5) %6
+  %9 = add i8 %7, %8
+  %10 = sext i8 %9 to i32
+  store i32 %10, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %alloca = alloca [2 x [2 x i8]], addrspace(5)
-  %gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
-  %gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
-  store i8 0, i8 addrspace(5)* %gep0
-  store i8 1, i8 addrspace(5)* %gep1
-  %gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
-  %load = load i8, i8 addrspace(5)* %gep2
+  %gep1 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+  store i8 0, ptr addrspace(5) %alloca
+  store i8 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+  %load = load i8, ptr addrspace(5) %gep2
   %sext = sext i8 %load to i32
-  store i32 %sext, i32 addrspace(1)* %out
+  store i32 %sext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %alloca = alloca [2 x [2 x i32]], addrspace(5)
-  %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
-  %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
-  %load = load i32, i32 addrspace(5)* %gep2
-  store i32 %load, i32 addrspace(1)* %out
+  %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %alloca
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+  %load = load i32, ptr addrspace(5) %gep2
+  store i32 %load, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %alloca = alloca [2 x [2 x i64]], addrspace(5)
-  %gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0
-  %gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1
-  store i64 0, i64 addrspace(5)* %gep0
-  store i64 1, i64 addrspace(5)* %gep1
-  %gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index
-  %load = load i64, i64 addrspace(5)* %gep2
-  store i64 %load, i64 addrspace(1)* %out
+  %gep1 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+  store i64 0, ptr addrspace(5) %alloca
+  store i64 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index
+  %load = load i64, ptr addrspace(5) %gep2
+  store i64 %load, ptr addrspace(1) %out
   ret void
 }
 
 %struct.pair32 = type { i32, i32 }
 
-define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5)
-  %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1
-  %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0
-  %load = load i32, i32 addrspace(5)* %gep2
-  store i32 %load, i32 addrspace(1)* %out
+  %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1
+  %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1, i32 1
+  store i32 0, ptr addrspace(5) %gep0
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index, i32 0
+  %load = load i32, ptr addrspace(5) %gep2
+  store i32 %load, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
+define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 {
 entry:
   %alloca = alloca [2 x %struct.pair32], addrspace(5)
-  %gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1
-  %gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0
-  %load = load i32, i32 addrspace(5)* %gep2
-  store i32 %load, i32 addrspace(1)* %out
+  %gep0 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1
+  %gep1 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0
+  store i32 0, ptr addrspace(5) %gep0
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 %index, i32 0
+  %load = load i32, ptr addrspace(5) %gep2
+  store i32 %load, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
+define amdgpu_kernel void @select_private(ptr addrspace(1) %out, i32 %in) nounwind {
 entry:
   %tmp = alloca [2 x i32], addrspace(5)
-  %tmp1 = getelementptr inbounds  [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
-  %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %tmp1
-  store i32 1, i32 addrspace(5)* %tmp2
+  %tmp2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %tmp
+  store i32 1, ptr addrspace(5) %tmp2
   %cmp = icmp eq i32 %in, 0
-  %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2
-  %load = load i32, i32 addrspace(5)* %sel
-  store i32 %load, i32 addrspace(1)* %out
+  %sel = select i1 %cmp, ptr addrspace(5) %tmp, ptr addrspace(5) %tmp2
+  %load = load i32, ptr addrspace(5) %sel
+  store i32 %load, ptr addrspace(1) %out
   ret void
 }
 
@@ -281,16 +263,16 @@ entry:
 ; SI-NOT: ds_write
 ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
 ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
-define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
   %alloca = alloca [16 x i32], addrspace(5)
-  %tmp0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
-  store i32 5, i32 addrspace(5)* %tmp0
-  %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32
+  %tmp0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a
+  store i32 5, ptr addrspace(5) %tmp0
+  %tmp1 = ptrtoint ptr addrspace(5) %alloca to i32
   %tmp2 = add i32 %tmp1, 5
-  %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)*
-  %tmp4 = getelementptr inbounds i32, i32 addrspace(5)* %tmp3, i32 %b
-  %tmp5 = load i32, i32 addrspace(5)* %tmp4
-  store i32 %tmp5, i32 addrspace(1)* %out
+  %tmp3 = inttoptr i32 %tmp2 to ptr addrspace(5)
+  %tmp4 = getelementptr inbounds i32, ptr addrspace(5) %tmp3, i32 %b
+  %tmp5 = load i32, ptr addrspace(5) %tmp4
+  store i32 %tmp5, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll
index 1474dbabba69..ed24e2f97c2c 100644
--- a/llvm/test/CodeGen/AMDGPU/pv.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv.ll
@@ -32,63 +32,63 @@ main_body:
   %tmp37 = extractelement <4 x float> %reg7, i32 1
   %tmp38 = extractelement <4 x float> %reg7, i32 2
   %tmp39 = extractelement <4 x float> %reg7, i32 3
-  %tmp40 = load <4 x float>, <4 x float> addrspace(8)* null
+  %tmp40 = load <4 x float>, ptr addrspace(8) null
   %tmp41 = extractelement <4 x float> %tmp40, i32 0
   %tmp42 = fmul float %tmp, %tmp41
-  %tmp43 = load <4 x float>, <4 x float> addrspace(8)* null
+  %tmp43 = load <4 x float>, ptr addrspace(8) null
   %tmp44 = extractelement <4 x float> %tmp43, i32 1
   %tmp45 = fmul float %tmp, %tmp44
-  %tmp46 = load <4 x float>, <4 x float> addrspace(8)* null
+  %tmp46 = load <4 x float>, ptr addrspace(8) null
   %tmp47 = extractelement <4 x float> %tmp46, i32 2
   %tmp48 = fmul float %tmp, %tmp47
-  %tmp49 = load <4 x float>, <4 x float> addrspace(8)* null
+  %tmp49 = load <4 x float>, ptr addrspace(8) null
   %tmp50 = extractelement <4 x float> %tmp49, i32 3
   %tmp51 = fmul float %tmp, %tmp50
-  %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
   %tmp53 = extractelement <4 x float> %tmp52, i32 0
   %tmp54 = fmul float %tmp13, %tmp53
   %tmp55 = fadd float %tmp54, %tmp42
-  %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
   %tmp57 = extractelement <4 x float> %tmp56, i32 1
   %tmp58 = fmul float %tmp13, %tmp57
   %tmp59 = fadd float %tmp58, %tmp45
-  %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
   %tmp61 = extractelement <4 x float> %tmp60, i32 2
   %tmp62 = fmul float %tmp13, %tmp61
   %tmp63 = fadd float %tmp62, %tmp48
-  %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
   %tmp65 = extractelement <4 x float> %tmp64, i32 3
   %tmp66 = fmul float %tmp13, %tmp65
   %tmp67 = fadd float %tmp66, %tmp51
-  %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %tmp69 = extractelement <4 x float> %tmp68, i32 0
   %tmp70 = fmul float %tmp14, %tmp69
   %tmp71 = fadd float %tmp70, %tmp55
-  %tmp72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %tmp72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %tmp73 = extractelement <4 x float> %tmp72, i32 1
   %tmp74 = fmul float %tmp14, %tmp73
   %tmp75 = fadd float %tmp74, %tmp59
-  %tmp76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %tmp76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %tmp77 = extractelement <4 x float> %tmp76, i32 2
   %tmp78 = fmul float %tmp14, %tmp77
   %tmp79 = fadd float %tmp78, %tmp63
-  %tmp80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %tmp80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %tmp81 = extractelement <4 x float> %tmp80, i32 3
   %tmp82 = fmul float %tmp14, %tmp81
   %tmp83 = fadd float %tmp82, %tmp67
-  %tmp84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %tmp84 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %tmp85 = extractelement <4 x float> %tmp84, i32 0
   %tmp86 = fmul float %tmp15, %tmp85
   %tmp87 = fadd float %tmp86, %tmp71
-  %tmp88 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %tmp88 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %tmp89 = extractelement <4 x float> %tmp88, i32 1
   %tmp90 = fmul float %tmp15, %tmp89
   %tmp91 = fadd float %tmp90, %tmp75
-  %tmp92 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %tmp92 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %tmp93 = extractelement <4 x float> %tmp92, i32 2
   %tmp94 = fmul float %tmp15, %tmp93
   %tmp95 = fadd float %tmp94, %tmp79
-  %tmp96 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %tmp96 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %tmp97 = extractelement <4 x float> %tmp96, i32 3
   %tmp98 = fmul float %tmp15, %tmp97
   %tmp99 = fadd float %tmp98, %tmp83
@@ -106,15 +106,15 @@ main_body:
   %tmp111 = fmul float %tmp16, %tmp110
   %tmp112 = fmul float %tmp17, %tmp110
   %tmp113 = fmul float %tmp18, %tmp110
-  %tmp114 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %tmp114 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %tmp115 = extractelement <4 x float> %tmp114, i32 0
   %tmp116 = fmul float %tmp115, %tmp20
   %tmp117 = fadd float %tmp116, %tmp32
-  %tmp118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %tmp118 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %tmp119 = extractelement <4 x float> %tmp118, i32 1
   %tmp120 = fmul float %tmp119, %tmp21
   %tmp121 = fadd float %tmp120, %tmp33
-  %tmp122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %tmp122 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %tmp123 = extractelement <4 x float> %tmp122, i32 2
   %tmp124 = fmul float %tmp123, %tmp22
   %tmp125 = fadd float %tmp124, %tmp34
@@ -126,11 +126,11 @@ main_body:
   %clamp.i10 = call float @llvm.minnum.f32(float %max.0.i9, float 1.000000e+00)
   %max.0.i7 = call float @llvm.maxnum.f32(float %tmp27, float 0.000000e+00)
   %clamp.i8 = call float @llvm.minnum.f32(float %max.0.i7, float 1.000000e+00)
-  %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %tmp127 = extractelement <4 x float> %tmp126, i32 0
-  %tmp128 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %tmp128 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %tmp129 = extractelement <4 x float> %tmp128, i32 1
-  %tmp130 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %tmp130 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %tmp131 = extractelement <4 x float> %tmp130, i32 2
   %tmp132 = insertelement <4 x float> undef, float %tmp111, i32 0
   %tmp133 = insertelement <4 x float> %tmp132, float %tmp112, i32 1
@@ -141,11 +141,11 @@ main_body:
   %tmp138 = insertelement <4 x float> %tmp137, float %tmp131, i32 2
   %tmp139 = insertelement <4 x float> %tmp138, float 0.000000e+00, i32 3
   %tmp140 = call float @llvm.r600.dot4(<4 x float> %tmp135, <4 x float> %tmp139)
-  %tmp141 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %tmp141 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %tmp142 = extractelement <4 x float> %tmp141, i32 0
-  %tmp143 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %tmp143 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %tmp144 = extractelement <4 x float> %tmp143, i32 1
-  %tmp145 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %tmp145 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %tmp146 = extractelement <4 x float> %tmp145, i32 2
   %tmp147 = insertelement <4 x float> undef, float %tmp111, i32 0
   %tmp148 = insertelement <4 x float> %tmp147, float %tmp112, i32 1
@@ -156,31 +156,31 @@ main_body:
   %tmp153 = insertelement <4 x float> %tmp152, float %tmp146, i32 2
   %tmp154 = insertelement <4 x float> %tmp153, float 0.000000e+00, i32 3
   %tmp155 = call float @llvm.r600.dot4(<4 x float> %tmp150, <4 x float> %tmp154)
-  %tmp156 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %tmp156 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
   %tmp157 = extractelement <4 x float> %tmp156, i32 0
   %tmp158 = fmul float %tmp157, %tmp20
-  %tmp159 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %tmp159 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
   %tmp160 = extractelement <4 x float> %tmp159, i32 1
   %tmp161 = fmul float %tmp160, %tmp21
-  %tmp162 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %tmp162 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8)
   %tmp163 = extractelement <4 x float> %tmp162, i32 2
   %tmp164 = fmul float %tmp163, %tmp22
-  %tmp165 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp165 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp166 = extractelement <4 x float> %tmp165, i32 0
   %tmp167 = fmul float %tmp166, %tmp24
-  %tmp168 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp168 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp169 = extractelement <4 x float> %tmp168, i32 1
   %tmp170 = fmul float %tmp169, %tmp25
-  %tmp171 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %tmp171 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9)
   %tmp172 = extractelement <4 x float> %tmp171, i32 2
   %tmp173 = fmul float %tmp172, %tmp26
-  %tmp174 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %tmp174 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
   %tmp175 = extractelement <4 x float> %tmp174, i32 0
   %tmp176 = fmul float %tmp175, %tmp28
-  %tmp177 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %tmp177 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
   %tmp178 = extractelement <4 x float> %tmp177, i32 1
   %tmp179 = fmul float %tmp178, %tmp29
-  %tmp180 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %tmp180 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10)
   %tmp181 = extractelement <4 x float> %tmp180, i32 2
   %tmp182 = fmul float %tmp181, %tmp30
   %tmp183 = fcmp uge float %tmp140, 0.000000e+00

diff  --git a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
index 2984c280b00a..e6c068f5c5b1 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll
@@ -17,11 +17,11 @@
 ; CHECK: Other: 0
 ; CHECK: Section: .text (0x2)
 ; CHECK: }
-define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 {
+define amdgpu_kernel void @test_constant_array_fixup(ptr addrspace(1) nocapture %out, i32 %idx) #0 {
 entry:
-  %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx
-  %val = load i32, i32 addrspace(4)* %arrayidx
-  store i32 %val, i32 addrspace(1)* %out, align 4
+  %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(4) @arr, i32 0, i32 %idx
+  %val = load i32, ptr addrspace(4) %arrayidx
+  store i32 %val, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
index 4dafe7d09e9b..39a9413e5dda 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
@@ -47,83 +47,83 @@ main_body:
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = fmul float %5, %0
-  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %8 = extractelement <4 x float> %7, i32 1
   %9 = fmul float %8, %0
-  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %11 = extractelement <4 x float> %10, i32 2
   %12 = fmul float %11, %0
-  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4)
   %14 = extractelement <4 x float> %13, i32 3
   %15 = fmul float %14, %0
-  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %17 = extractelement <4 x float> %16, i32 0
   %18 = fmul float %17, %1
   %19 = fadd float %18, %6
-  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %20 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %1
   %23 = fadd float %22, %9
-  %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %24 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %25 = extractelement <4 x float> %24, i32 2
   %26 = fmul float %25, %1
   %27 = fadd float %26, %12
-  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %28 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5)
   %29 = extractelement <4 x float> %28, i32 3
   %30 = fmul float %29, %1
   %31 = fadd float %30, %15
-  %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %32 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float %33, %2
   %35 = fadd float %34, %19
-  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %36 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
   %37 = extractelement <4 x float> %36, i32 1
   %38 = fmul float %37, %2
   %39 = fadd float %38, %23
-  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %40 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
   %41 = extractelement <4 x float> %40, i32 2
   %42 = fmul float %41, %2
   %43 = fadd float %42, %27
-  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %44 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6)
   %45 = extractelement <4 x float> %44, i32 3
   %46 = fmul float %45, %2
   %47 = fadd float %46, %31
-  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %49 = extractelement <4 x float> %48, i32 0
   %50 = fmul float %49, %3
   %51 = fadd float %50, %35
-  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %53 = extractelement <4 x float> %52, i32 1
   %54 = fmul float %53, %3
   %55 = fadd float %54, %39
-  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %57 = extractelement <4 x float> %56, i32 2
   %58 = fmul float %57, %3
   %59 = fadd float %58, %43
-  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7)
   %61 = extractelement <4 x float> %60, i32 3
   %62 = fmul float %61, %3
   %63 = fadd float %62, %47
-  %64 = load <4 x float>, <4 x float> addrspace(8)* null
+  %64 = load <4 x float>, ptr addrspace(8) null
   %65 = extractelement <4 x float> %64, i32 0
-  %66 = load <4 x float>, <4 x float> addrspace(8)* null
+  %66 = load <4 x float>, ptr addrspace(8) null
   %67 = extractelement <4 x float> %66, i32 1
-  %68 = load <4 x float>, <4 x float> addrspace(8)* null
+  %68 = load <4 x float>, ptr addrspace(8) null
   %69 = extractelement <4 x float> %68, i32 2
-  %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %70 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %71 = extractelement <4 x float> %70, i32 0
-  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %73 = extractelement <4 x float> %72, i32 1
-  %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %74 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2)
   %75 = extractelement <4 x float> %74, i32 2
-  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %77 = extractelement <4 x float> %76, i32 0
-  %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %78 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %79 = extractelement <4 x float> %78, i32 1
-  %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3)
   %81 = extractelement <4 x float> %80, i32 2
   %82 = insertelement <4 x float> undef, float %51, i32 0
   %83 = insertelement <4 x float> %82, float %55, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
index f0604c7fe782..f3a785db0b2b 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=cypress -start-after safe-stack | FileCheck %s
 ; Don't crash
 
-define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
 ; CHECK-LABEL: test:
 ; CHECK:       ; %bb.0: ; %bb
 ; CHECK-NEXT:    ALU 4, @6, KC0[CB0:0-32], KC1[]
@@ -23,13 +23,13 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) {
 ; CHECK-NEXT:     LSHR * T1.X, T0.W, literal.y,
 ; CHECK-NEXT:    4(5.605194e-45), 2(2.802597e-45)
 bb:
-  store i64 2, i64 addrspace(1)* %out
-  %tmp = load i64, i64 addrspace(1)* %out
+  store i64 2, ptr addrspace(1) %out
+  %tmp = load i64, ptr addrspace(1) %out
   br label %jump
 
 jump:                                             ; preds = %bb
   %tmp1 = icmp ugt i64 %tmp, 4
   %umax = select i1 %tmp1, i64 %tmp, i64 4
-  store i64 %umax, i64 addrspace(1)* %out
+  store i64 %umax, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.add.ll b/llvm/test/CodeGen/AMDGPU/r600.add.ll
index 73eea3ef2177..a4a7c3d86d80 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.add.ll
@@ -2,24 +2,24 @@
 
 ; FUNC-LABEL: {{^}}s_add_i32:
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32, i32 addrspace(1)* %in
-  %b = load i32, i32 addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+  %a = load i32, ptr addrspace(1) %in
+  %b = load i32, ptr addrspace(1) %b_ptr
   %result = add i32 %a, %b
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}s_add_v2i32:
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = add <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -28,12 +28,12 @@ define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+define amdgpu_kernel void @s_add_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = add <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -46,10 +46,10 @@ define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a
 ; EG: ADD_INT
 ; EG: ADD_INT
 ; EG: ADD_INT
-define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
+define amdgpu_kernel void @s_add_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b) {
 entry:
   %0 = add <8 x i32> %a, %b
-  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
+  store <8 x i32> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -70,33 +70,33 @@ entry:
 ; EG: ADD_INT
 ; EG: ADD_INT
 ; EG: ADD_INT
-define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
+define amdgpu_kernel void @s_add_v16i32(ptr addrspace(1) %out, <16 x i32> %a, <16 x i32> %b) {
 entry:
   %0 = add <16 x i32> %a, %b
-  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
+  store <16 x i32> %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_add_i32:
-define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
-  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
-  %a = load volatile i32, i32 addrspace(1)* %gep
-  %b = load volatile i32, i32 addrspace(1)* %b_ptr
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
+  %a = load volatile i32, ptr addrspace(1) %gep
+  %b = load volatile i32, ptr addrspace(1) %b_ptr
   %result = add i32 %a, %b
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_add_imm_i32:
-define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid
-  %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1
-  %a = load volatile i32, i32 addrspace(1)* %gep
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
+  %a = load volatile i32, ptr addrspace(1) %gep
   %result = add i32 %a, 123
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -107,10 +107,10 @@ define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1
 ; EG-DAG: ADD_INT
 ; EG-DAG: ADD_INT {{[* ]*}}
 ; EG-NOT: SUB
-define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @add64(ptr addrspace(1) %out, i64 %a, i64 %b) {
 entry:
   %add = add i64 %a, %b
-  store i64 %add, i64 addrspace(1)* %out
+  store i64 %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -126,11 +126,11 @@ entry:
 ; EG-DAG: ADD_INT
 ; EG-DAG: ADD_INT {{[* ]*}}
 ; EG-NOT: SUB
-define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @add64_sgpr_vgpr(ptr addrspace(1) %out, i64 %a, ptr addrspace(1) %in) {
 entry:
-  %0 = load i64, i64 addrspace(1)* %in
+  %0 = load i64, ptr addrspace(1) %in
   %1 = add i64 %a, %0
-  store i64 %1, i64 addrspace(1)* %out
+  store i64 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -142,13 +142,13 @@ entry:
 ; EG-DAG: ADD_INT
 ; EG-DAG: ADD_INT {{[* ]*}}
 ; EG-NOT: SUB
-define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %a, i64 %b, i64 %c) {
 entry:
   %0 = icmp eq i64 %a, 0
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64, i64 addrspace(1)* %in
+  %1 = load i64, ptr addrspace(1) %in
   br label %endif
 
 else:
@@ -157,7 +157,7 @@ else:
 
 endif:
   %3 = phi i64 [%1, %if], [%2, %else]
-  store i64 %3, i64 addrspace(1)* %out
+  store i64 %3, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
index 6caa6173e7a0..acac5c321dfd 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll
@@ -6,10 +6,10 @@
 
 %struct.foo = type {i32, i32, i32}
 
-define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo addrspace(5)* %in, i32 %offset) {
+define amdgpu_kernel void @alu_limits(ptr addrspace(1) %out, ptr addrspace(5) %in, i32 %offset) {
 entry:
-  %ptr = getelementptr inbounds %struct.foo, %struct.foo addrspace(5)* %in, i32 1, i32 2
-  %x = load i32, i32  addrspace(5)*%ptr, align 4
+  %ptr = getelementptr inbounds %struct.foo, ptr addrspace(5) %in, i32 1, i32 2
+  %x = load i32, ptr  addrspace(5) %ptr, align 4
   br label %loop
 loop:
   %i = phi i32 [ 100, %entry ], [ %nexti, %loop ]
@@ -24,6 +24,6 @@ loop:
   br i1 %cond, label %loop, label %end
 end:
   %out_val = add i32 %nextval, 4
-  store i32 %out_val, i32 addrspace(1)* %out, align 4
+  store i32 %out_val, ptr addrspace(1) %out, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
index a07c0a5d5422..66e38f59f44b 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -mtriple=r600-- -passes='default<O3>,aa-eval' -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1
-define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) {
-  load volatile i8, i8 addrspace(5)* %p
-  load volatile i8, i8 addrspace(999)* %p1
+define amdgpu_kernel void @test(ptr addrspace(5) %p, ptr addrspace(999) %p1) {
+  load volatile i8, ptr addrspace(5) %p
+  load volatile i8, ptr addrspace(999) %p1
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
index 20de997e1bce..e7674ce6d88d 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll
@@ -3,7 +3,7 @@
 
 ; This test just checks that the compiler doesn't crash.
 
-define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
+define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; EG-LABEL: i8ptr_v16i8ptr:
 ; EG:       ; %bb.0: ; %entry
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -20,13 +20,12 @@ define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrs
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
-  %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
-  %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
-  store <16 x i8> %1, <16 x i8> addrspace(1)* %out
+  %0 = load <16 x i8>, ptr addrspace(1) %in
+  store <16 x i8> %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: f32_to_v2i16:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -42,13 +41,13 @@ define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addr
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load float, float addrspace(1)* %in, align 4
+  %load = load float, ptr addrspace(1) %in, align 4
   %bc = bitcast float %load to <2 x i16>
-  store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
+  store <2 x i16> %bc, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: v2i16_to_f32:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -64,13 +63,13 @@ define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addr
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
+  %load = load <2 x i16>, ptr addrspace(1) %in, align 4
   %bc = bitcast <2 x i16> %load to float
-  store float %bc, float addrspace(1)* %out, align 4
+  store float %bc, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: v4i8_to_i32:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -86,13 +85,13 @@ define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspac
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, ptr addrspace(1) %in, align 4
   %bc = bitcast <4 x i8> %load to i32
-  store i32 %bc, i32 addrspace(1)* %out, align 4
+  store i32 %bc, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: i32_to_v4i8:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -108,13 +107,13 @@ define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspac
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %bc = bitcast i32 %load to <4 x i8>
-  store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
+  store <4 x i8> %bc, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v2i16_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: v2i16_to_v4i8:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -130,16 +129,16 @@ define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16>
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load <2 x i16>, <2 x i16>  addrspace(1)* %in, align 4
+  %load = load <2 x i16>, ptr  addrspace(1) %in, align 4
   %bc = bitcast <2 x i16> %load to <4 x i8>
-  store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
+  store <4 x i8> %bc, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine
 ; the stack manipulation is tricky to follow
 ; TODO: This should only use one load
-define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @v4i16_extract_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; EG-LABEL: v4i16_extract_i8:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
@@ -172,14 +171,14 @@ define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> add
 ; EG-NEXT:     MOV * T5.Z, 0.0,
 ; EG-NEXT:     LSHR * T6.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %load = load <4 x i16>, <4 x i16>  addrspace(1)* %in, align 2
+  %load = load <4 x i16>, ptr  addrspace(1) %in, align 2
   %bc = bitcast <4 x i16> %load to <8 x i8>
   %element = extractelement <8 x i8> %bc, i32 5
-  store i8 %element, i8 addrspace(1)* %out
+  store i8 %element, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; EG-LABEL: bitcast_v2i32_to_f64:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
@@ -195,9 +194,9 @@ define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x
 ; EG-NEXT:    ALU clause starting at 9:
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, ptr addrspace(1) %in, align 8
   %bc = bitcast <2 x i32> %val to double
-  store double %bc, double addrspace(1)* %out, align 8
+  store double %bc, ptr addrspace(1) %out, align 8
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
index 71af6a9a4f51..9f2cf98dc3ef 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll
@@ -16,7 +16,7 @@
 ; Pattern a. 32-bit
 ; ---------------------------------------------------------------------------- ;
 
-define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_a0:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -41,11 +41,11 @@ define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_a1_indexzext:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
@@ -87,11 +87,11 @@ define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits,
   %onebit = shl i32 1, %conv
   %mask = add nsw i32 %onebit, -1
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_a4_commutative:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -116,7 +116,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
   %masked = and i32 %val, %mask ; swapped order
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
@@ -124,7 +124,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32
 ; Pattern b. 32-bit
 ; ---------------------------------------------------------------------------- ;
 
-define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_b0:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -149,11 +149,11 @@ define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_b1_indexzext:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
@@ -195,11 +195,11 @@ define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits,
   %notmask = shl i32 -1, %conv
   %mask = xor i32 %notmask, -1
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_b4_commutative:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -224,7 +224,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
   %masked = and i32 %val, %mask ; swapped order
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
@@ -232,7 +232,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32
 ; Pattern c. 32-bit
 ; ---------------------------------------------------------------------------- ;
 
-define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_c0:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -257,11 +257,11 @@ define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_c1_indexzext:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
@@ -311,11 +311,11 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 add
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
   %masked = and i32 %mask, %val
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_c4_commutative:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -340,7 +340,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   %masked = and i32 %val, %mask ; swapped order
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
@@ -348,7 +348,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32
 ; Pattern d. 32-bit.
 ; ---------------------------------------------------------------------------- ;
 
-define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_d0:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -373,11 +373,11 @@ define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %val, %numhighbits
   %masked = lshr i32 %highbitscleared, %numhighbits
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) {
 ; EG-LABEL: bzhi32_d1_indexzext:
 ; EG:       ; %bb.0:
 ; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
@@ -425,6 +425,6 @@ define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 add
   %sh_prom = zext i8 %numhighbits to i32
   %highbitscleared = shl i32 %val, %sh_prom
   %masked = lshr i32 %highbitscleared, %sh_prom
-  store i32 %masked, i32 addrspace(1)* %out
+  store i32 %masked, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
index 3d2f1b4fb9f4..1963574c9494 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll
@@ -6,20 +6,20 @@
 ; FUNC-LABEL: {{^}}atomic_add_i32_offset:
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_soffset:
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_soffset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
@@ -27,516 +27,516 @@ entry:
 ; FIXME: looks like the offset is wrong
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32_huge_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595
 
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32:
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_add_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
 ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_add_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_offset:
 ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32:
 ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_and_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
 ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_and_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
 ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32:
 ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_sub_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
 ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_sub_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_offset:
 ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32:
 ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile max ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
 ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_max_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile max ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
 ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32:
 ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umax_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile umax ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
 ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umax_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile umax ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_offset:
 ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32:
 ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_min_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile min ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
 ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_min_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile min ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
 ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32:
 ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_umin_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile umin ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
 ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_umin_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile umin ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_offset:
 ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32:
 ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_or_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
 ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_or_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xchg_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
 ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr addrspace(1) %out, i32 %in, i32 %old) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
 ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32(ptr addrspace(1) %out, i32 %in, i32 %old) {
 entry:
-  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+  %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in seq_cst seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
 ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in seq_cst seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
 ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32_offset(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32:
 ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @atomic_xor_i32(ptr addrspace(1) %out, i32 %in) {
 entry:
-  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+  %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
 ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z
-define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+define amdgpu_kernel void @atomic_xor_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32_offset:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, ptr addrspace(1) %out) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  store atomic i32 %in, i32 addrspace(1)* %gep  seq_cst, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  store atomic i32 %in, ptr addrspace(1) %gep  seq_cst, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_store_i32(i32 %in, ptr addrspace(1) %out) {
 entry:
-  store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
+  store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, ptr addrspace(1) %out, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
-  store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4
+  store atomic i32 %in, ptr addrspace(1) %gep seq_cst, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
 ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]]
 ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y
-define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
+define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, ptr addrspace(1) %out, i64 %index) {
 entry:
-  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
-  store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
+  %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index
+  store atomic i32 %in, ptr addrspace(1) %ptr seq_cst, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_1
 ; EG: MEM_RAT ATOMIC_ADD
-define amdgpu_kernel void @atomic_add_1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_add_1(ptr addrspace(1) %out) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 1 seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_neg1
 ; EG: MEM_RAT ATOMIC_ADD
-define amdgpu_kernel void @atomic_add_neg1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_add_neg1(ptr addrspace(1) %out) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 -1 seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_neg1
 ; EG: MEM_RAT ATOMIC_SUB
-define amdgpu_kernel void @atomic_sub_neg1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_sub_neg1(ptr addrspace(1) %out) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 -1 seq_cst
   ret void
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_1
 ; EG: MEM_RAT ATOMIC_SUB
-define amdgpu_kernel void @atomic_sub_1(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @atomic_sub_1(ptr addrspace(1) %out) {
 entry:
-  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
-  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %out, i64 4
+  %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 seq_cst
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
index 99d55feb740e..1650185a9f07 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
@@ -10,17 +10,16 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 ; Additional check in case the move ends up in the last slot
 ; R600-NOT: MOV * TO.X
 
-define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {
 entry:
   %0 = alloca [2 x i32], addrspace(5)
-  %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
-  %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %1
-  store i32 1, i32 addrspace(5)* %2
-  %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
-  %4 = load i32, i32 addrspace(5)* %3
-  %5 = call i32 @llvm.r600.read.tidig.x()
-  %6 = add i32 %4, %5
-  store i32 %6, i32 addrspace(1)* %out
+  %1 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %0
+  store i32 1, ptr addrspace(5) %1
+  %2 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in
+  %3 = load i32, ptr addrspace(5) %2
+  %4 = call i32 @llvm.r600.read.tidig.x()
+  %5 = add i32 %3, %4
+  store i32 %5, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.sub.ll b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
index 2ded4f64328d..98eec2f08b37 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
@@ -3,48 +3,48 @@
 declare i32 @llvm.r600.read.tidig.x() readnone
 
 ; FUNC-LABEL: {{^}}s_sub_i32:
-define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %result = sub i32 %a, %b
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}s_sub_imm_i32:
-define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @s_sub_imm_i32(ptr addrspace(1) %out, i32 %a) {
   %result = sub i32 1234, %a
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_i32:
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32, i32 addrspace(1)* %in
-  %b = load i32, i32 addrspace(1)* %b_ptr
+define amdgpu_kernel void @test_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+  %a = load i32, ptr addrspace(1) %in
+  %b = load i32, ptr addrspace(1) %b_ptr
   %result = sub i32 %a, %b
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_imm_i32:
 ; EG: SUB_INT
-define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %a = load i32, i32 addrspace(1)* %in
+define amdgpu_kernel void @test_sub_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %a = load i32, ptr addrspace(1) %in
   %result = sub i32 123, %a
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_v2i32:
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test_sub_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = sub <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -53,48 +53,48 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test_sub_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = sub <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_i16:
-define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.r600.read.tidig.x()
-  %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
-  %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
-  %a = load volatile i16, i16 addrspace(1)* %gep
-  %b = load volatile i16, i16 addrspace(1)* %b_ptr
+  %gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i32 1
+  %a = load volatile i16, ptr addrspace(1) %gep
+  %b = load volatile i16, ptr addrspace(1) %b_ptr
   %result = sub i16 %a, %b
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_v2i16:
-define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.r600.read.tidig.x()
-  %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
+  %a = load <2 x i16>, ptr addrspace(1) %gep
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = sub <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_sub_v4i16:
-define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @test_sub_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.r600.read.tidig.x()
-  %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
-  %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
+  %gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
+  %a = load <4 x i16>, ptr addrspace(1) %gep
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = sub <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out
+  store <4 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -104,9 +104,9 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16
 ; EG-DAG: SUBB_UINT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT {{[* ]*}}
-define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
+define amdgpu_kernel void @s_sub_i64(ptr addrspace(1) noalias %out, i64 %a, i64 %b) nounwind {
   %result = sub i64 %a, %b
-  store i64 %result, i64 addrspace(1)* %out, align 8
+  store i64 %result, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -116,37 +116,37 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64
 ; EG-DAG: SUBB_UINT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT {{[* ]*}}
-define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
+define amdgpu_kernel void @v_sub_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
-  %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
-  %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
-  %a = load i64, i64 addrspace(1)* %a_ptr
-  %b = load i64, i64 addrspace(1)* %b_ptr
+  %a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid
+  %b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid
+  %a = load i64, ptr addrspace(1) %a_ptr
+  %b = load i64, ptr addrspace(1) %b_ptr
   %result = sub i64 %a, %b
-  store i64 %result, i64 addrspace(1)* %out, align 8
+  store i64 %result, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_test_sub_v2i64:
-define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v2i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
-  %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
-  %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
-  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+  %a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid
+  %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid
+  %a = load <2 x i64>, ptr addrspace(1) %a_ptr
+  %b = load <2 x i64>, ptr addrspace(1) %b_ptr
   %result = sub <2 x i64> %a, %b
-  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_test_sub_v4i64:
-define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
+define amdgpu_kernel void @v_test_sub_v4i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
-  %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
-  %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
-  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+  %a_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inA, i32 %tid
+  %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inB, i32 %tid
+  %a = load <4 x i64>, ptr addrspace(1) %a_ptr
+  %b = load <4 x i64>, ptr addrspace(1) %b_ptr
   %result = sub <4 x i64> %a, %b
-  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %result, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
index 03f0539e19be..323a84df4abc 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -3,59 +3,59 @@
 
 ; FUNC-LABEL: {{^}}tgid_x:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
-define amdgpu_kernel void @tgid_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_x(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.x() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}tgid_y:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
 ; EG: MOV [[REG]].X, T1.Y
-define amdgpu_kernel void @tgid_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_y(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.y() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}tgid_z:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
 ; EG: MOV [[REG]].X, T1.Z
-define amdgpu_kernel void @tgid_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tgid_z(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.z() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}tidig_x:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
-define amdgpu_kernel void @tidig_x(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_x(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}tidig_y:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
 ; EG: MOV [[REG]].X, T0.Y
-define amdgpu_kernel void @tidig_y(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_y(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.y() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}tidig_z:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
 ; EG: MOV [[REG]].X, T0.Z
-define amdgpu_kernel void @tidig_z(i32 addrspace(1)* %out) {
+define amdgpu_kernel void @tidig_z(ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.z() #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -65,28 +65,26 @@ entry:
 ; EG-NOT: VTX_READ
 ; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z
 ; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal
-define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
-  %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
-  %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
-  %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
-  %value = load i32, i32 addrspace(7)* %gep
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
+  %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
+  %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 4
+  %value = load i32, ptr addrspace(7) %gep
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}test_implicit_dyn:
 ; 36 prepended implicit bytes + 8(out pointer + in) = 44
 ; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3
-define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
-  %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
-  %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
-  %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
-  %value = load i32, i32 addrspace(7)* %gep
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1 {
+  %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
+  %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 %in
+  %value = load i32, ptr addrspace(7) %gep
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0
+declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0
 
 declare i32 @llvm.r600.read.tgid.x() #0
 declare i32 @llvm.r600.read.tgid.y() #0

diff  --git a/llvm/test/CodeGen/AMDGPU/r600cfg.ll b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
index 2996a1053da5..f2c7fcb38716 100644
--- a/llvm/test/CodeGen/AMDGPU/r600cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
@@ -83,7 +83,7 @@ ELSE45:                                           ; preds = %ENDIF40
 ENDIF43:                                          ; preds = %ELSE45, %IF44
   %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
   %52 = bitcast i32 %.sink to float
-  %53 = load <4 x float>, <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, ptr addrspace(8) null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = bitcast float %54 to i32
   br label %LOOP47

diff  --git a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
index 4ea503bf6098..cf2831f21fea 100644
--- a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -5,10 +5,10 @@
 ; EG: MOV [[VAL]], literal.x
 ; EG-NEXT: LSHR
 ; EG-NEXT: 0(
-define amdgpu_kernel void @test_0(i32 %in0, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_0(i32 %in0, ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -17,10 +17,10 @@ entry:
 ; EG: MOV [[VAL]], literal.x
 ; EG-NEXT: LSHR
 ; EG-NEXT: 1(
-define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -29,10 +29,10 @@ entry:
 ; EG: MOV [[VAL]], literal.x
 ; EG-NEXT: LSHR
 ; EG-NEXT: 2(
-define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
+define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, ptr addrspace(1) %out) {
 entry:
   %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
-  store i32 %0, i32 addrspace(1)* %out
+  store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -43,21 +43,21 @@ attributes #0 = { readnone }
 
 !opencl.kernels = !{!0, !1, !2}
 
-!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50}
+!0 = !{ptr @test_0, !10, !20, !30, !40, !50}
 !10 = !{!"kernel_arg_addr_space", i32 0, i32 1}
 !20 = !{!"kernel_arg_access_qual", !"none", !"none"}
 !30 = !{!"kernel_arg_type", !"sampler_t", !"int*"}
 !40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"}
 !50 = !{!"kernel_arg_type_qual", !"", !""}
 
-!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51}
+!1 = !{ptr @test_1, !11, !21, !31, !41, !51}
 !11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1}
 !21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
 !31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"}
 !41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"}
 !51 = !{!"kernel_arg_type_qual", !"", !"", !""}
 
-!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52}
+!2 = !{ptr @test_2, !12, !22, !32, !42, !52}
 !12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1}
 !22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
 !32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}

diff  --git a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
index 2cdfb06268e4..8495486aa491 100644
--- a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
@@ -33,9 +33,9 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: BFE_UINT
-define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %result = sdiv i64 %x, %y
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -72,31 +72,31 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: AND_INT {{.*}}, 1,
-define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %result = urem i64 %x, %y
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
 ;EG-LABEL: {{^}}test_sdiv3264:
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = ashr i64 %x, 33
   %2 = ashr i64 %y, 33
   %result = sdiv i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
 ;EG-LABEL: {{^}}test_srem3264:
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = ashr i64 %x, 33
   %2 = ashr i64 %y, 33
   %result = srem i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -106,11 +106,11 @@ define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
 ;EG: FLT_TO_INT
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_sdiv2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = ashr i64 %x, 40
   %2 = ashr i64 %y, 40
   %result = sdiv i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -120,10 +120,10 @@ define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y)
 ;EG: FLT_TO_INT
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_srem2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = ashr i64 %x, 40
   %2 = ashr i64 %y, 40
   %result = srem i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/set-dx10.ll b/llvm/test/CodeGen/AMDGPU/set-dx10.ll
index 9a317a87540f..6bec5c87a36c 100644
--- a/llvm/test/CodeGen/AMDGPU/set-dx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-dx10.ll
@@ -8,13 +8,13 @@
 ; CHECK: LSHR
 ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp une float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -22,11 +22,11 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_une_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp une float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -34,13 +34,13 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -48,11 +48,11 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oeq_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -60,13 +60,13 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -74,11 +74,11 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ogt_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -86,13 +86,13 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -100,11 +100,11 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_oge_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -112,13 +112,13 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -126,11 +126,11 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_ole_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -138,13 +138,13 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_fptosi(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fneg float %1
   %3 = fptosi float %2 to i32
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -152,10 +152,10 @@ entry:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define amdgpu_kernel void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fcmp_olt_select_i32(ptr addrspace(1) %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
index 7ac4e1d9fe4b..6eafce674f2a 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
@@ -11,10 +11,10 @@
 ; EG: LSHR {{\*?}} [[ADDR]]
 
 ; Works with the align 2 removed
-define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
+define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %c = add <2 x i32> %a, %b
   %x = shl <2 x i32> %c, <i32 6, i32 6>
   %y = ashr <2 x i32> %x, <i32 7, i32 7>
-  store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
+  store <2 x i32> %y, ptr addrspace(1) %out, align 2
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
index 7cf380520d42..0e7e9a0fbbc9 100644
--- a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
@@ -12,56 +12,56 @@ main_body:
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float>, <4 x float> addrspace(8)* null
+  %4 = load <4 x float>, ptr addrspace(8) null
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = load <4 x float>, <4 x float> addrspace(8)* null
+  %6 = load <4 x float>, ptr addrspace(8) null
   %7 = extractelement <4 x float> %6, i32 2
-  %8 = load <4 x float>, <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, ptr addrspace(8) null
   %9 = extractelement <4 x float> %8, i32 0
   %10 = fmul float 0.000000e+00, %9
-  %11 = load <4 x float>, <4 x float> addrspace(8)* null
+  %11 = load <4 x float>, ptr addrspace(8) null
   %12 = extractelement <4 x float> %11, i32 0
   %13 = fmul float %5, %12
-  %14 = load <4 x float>, <4 x float> addrspace(8)* null
+  %14 = load <4 x float>, ptr addrspace(8) null
   %15 = extractelement <4 x float> %14, i32 0
   %16 = fmul float 0.000000e+00, %15
-  %17 = load <4 x float>, <4 x float> addrspace(8)* null
+  %17 = load <4 x float>, ptr addrspace(8) null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float 0.000000e+00, %18
-  %20 = load <4 x float>, <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, ptr addrspace(8) null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fmul float %7, %21
-  %23 = load <4 x float>, <4 x float> addrspace(8)* null
+  %23 = load <4 x float>, ptr addrspace(8) null
   %24 = extractelement <4 x float> %23, i32 0
   %25 = fmul float 0.000000e+00, %24
-  %26 = load <4 x float>, <4 x float> addrspace(8)* null
+  %26 = load <4 x float>, ptr addrspace(8) null
   %27 = extractelement <4 x float> %26, i32 0
   %28 = fmul float 0.000000e+00, %27
-  %29 = load <4 x float>, <4 x float> addrspace(8)* null
+  %29 = load <4 x float>, ptr addrspace(8) null
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float 0.000000e+00, %30
-  %32 = load <4 x float>, <4 x float> addrspace(8)* null
+  %32 = load <4 x float>, ptr addrspace(8) null
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float 0.000000e+00, %33
-  %35 = load <4 x float>, <4 x float> addrspace(8)* null
+  %35 = load <4 x float>, ptr addrspace(8) null
   %36 = extractelement <4 x float> %35, i32 0
   %37 = fmul float 0.000000e+00, %36
-  %38 = load <4 x float>, <4 x float> addrspace(8)* null
+  %38 = load <4 x float>, ptr addrspace(8) null
   %39 = extractelement <4 x float> %38, i32 0
   %40 = fmul float 1.000000e+00, %39
-  %41 = load <4 x float>, <4 x float> addrspace(8)* null
+  %41 = load <4 x float>, ptr addrspace(8) null
   %42 = extractelement <4 x float> %41, i32 0
   %43 = fmul float 0.000000e+00, %42
-  %44 = load <4 x float>, <4 x float> addrspace(8)* null
+  %44 = load <4 x float>, ptr addrspace(8) null
   %45 = extractelement <4 x float> %44, i32 0
   %46 = fmul float 0.000000e+00, %45
-  %47 = load <4 x float>, <4 x float> addrspace(8)* null
+  %47 = load <4 x float>, ptr addrspace(8) null
   %48 = extractelement <4 x float> %47, i32 0
   %49 = fmul float 0.000000e+00, %48
-  %50 = load <4 x float>, <4 x float> addrspace(8)* null
+  %50 = load <4 x float>, ptr addrspace(8) null
   %51 = extractelement <4 x float> %50, i32 0
   %52 = fmul float 0.000000e+00, %51
-  %53 = load <4 x float>, <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, ptr addrspace(8) null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = fmul float 1.000000e+00, %54
   %56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@ main_body:
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = fadd float %0, 2.5
   %3 = fmul float %1, 3.5
-  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = call float @llvm.cos.f32(float %5)
-  %7 = load <4 x float>, <4 x float> addrspace(8)* null
+  %7 = load <4 x float>, ptr addrspace(8) null
   %8 = extractelement <4 x float> %7, i32 0
-  %9 = load <4 x float>, <4 x float> addrspace(8)* null
+  %9 = load <4 x float>, ptr addrspace(8) null
   %10 = extractelement <4 x float> %9, i32 1
   %11 = insertelement <4 x float> undef, float %2, i32 0
   %12 = insertelement <4 x float> %11, float %3, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
index 62cab48c65e9..4cf1e64a8bd5 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
@@ -33,9 +33,9 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: BFE_UINT
-define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %result = udiv i64 %x, %y
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -72,31 +72,31 @@ define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: AND_INT {{.*}}, 1,
-define amdgpu_kernel void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %result = urem i64 %x, %y
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
 ;EG-LABEL: {{^}}test_udiv3264:
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = lshr i64 %x, 33
   %2 = lshr i64 %y, 33
   %result = udiv i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
 ;EG-LABEL: {{^}}test_urem3264:
 ;EG: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = lshr i64 %x, 33
   %2 = lshr i64 %y, 33
   %result = urem i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -106,11 +106,11 @@ define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
 ;EG: FLT_TO_UINT
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_udiv2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = lshr i64 %x, 41
   %2 = lshr i64 %y, 41
   %result = udiv i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -120,17 +120,17 @@ define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y)
 ;EG: FLT_TO_UINT
 ;EG-NOT: RECIP_UINT
 ;EG-NOT: BFE_UINT
-define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+define amdgpu_kernel void @test_urem2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
   %1 = lshr i64 %x, 41
   %2 = lshr i64 %y, 41
   %result = urem i64 %1, %2
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
 ;EG-LABEL: {{^}}test_udiv_k:
-define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) {
+define amdgpu_kernel void @test_udiv_k(ptr addrspace(1) %out, i64 %x) {
   %result = udiv i64 24, %x
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
index b7d766aa395e..90cc40fb6517 100644
--- a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
 
 ; CHECK-LABEL: {{^}}kernel_arg_i64:
-define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
-  store i64 %a, i64 addrspace(1)* %out, align 8
+define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind {
+  store i64 %a, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; i64 arg works, v1i64 arg does not.
 ; CHECK-LABEL: {{^}}kernel_arg_v1i64:
-define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
-  store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+define amdgpu_kernel void @kernel_arg_v1i64(ptr addrspace(1) %out, <1 x i64> %a) nounwind {
+  store <1 x i64> %a, ptr addrspace(1) %out, align 8
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
index f002a1474e02..f6a642ecccd9 100644
--- a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
@@ -6,9 +6,9 @@
 ; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
 ; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
 
-define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %v = load i32, i32 addrspace(1)* %in
-  store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %v = load i32, ptr addrspace(1) %in
+  store i32 %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -16,9 +16,9 @@ define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; EG: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
 ; CM: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x00,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x00,0x00
 
-define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %v = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  store <4 x i32> %v, <4 x i32> addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch128(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %v = load <4 x i32>, ptr addrspace(1) %in
+  store <4 x i32> %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -26,9 +26,9 @@ define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32>
 ; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
 ; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
 
-define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace(7)* %in) {
-  %v = load i32, i32 addrspace(7)* %in
-  store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32_id3(ptr addrspace(1) %out, ptr addrspace(7) %in) {
+  %v = load i32, ptr addrspace(7) %in
+  store i32 %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -38,9 +38,9 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace
 
 @t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]
 
-define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
-  %a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
-  %v = load i32, i32 addrspace(4)* %a
-  store i32 %v, i32 addrspace(1)* %out
+define amdgpu_kernel void @vtx_fetch32_id2(ptr addrspace(1) %out, i32 %in) {
+  %a = getelementptr inbounds [4 x i32], ptr addrspace(4) @t, i32 0, i32 %in
+  %v = load i32, ptr addrspace(4) %a
+  store i32 %v, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
index 88ef9fd93c8f..3067ac75340d 100644
--- a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
@@ -5,16 +5,16 @@
 
 ; TODO: enable doubles
 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
-define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
-  %val = load double, double addrspace(1)* %in, align 8
+define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load double, ptr addrspace(1) %in, align 8
   %add = fadd double %val, 4.0
   %bc = bitcast double %add to <2 x i32>
-  store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %bc, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
-define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
+define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) {
 entry:
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %if, label %end
@@ -25,12 +25,12 @@ if:
 
 end:
   %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
-  store <2 x double> %phi, <2 x double> addrspace(1)* %out
+  store <2 x double> %phi, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
-define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
+define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) {
 entry:
   %cmp0 = icmp eq i32 %cond, 0
   br i1 %cmp0, label %if, label %end
@@ -41,6 +41,6 @@ if:
 
 end:
   %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
-  store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %phi, ptr addrspace(1) %out
   ret void
 }