[llvm] b5bc205 - AMDGPU: Convert some bit operation tests to opaque pointers

Tue Nov 29 15:37:10 PST 2022

Author: Matt Arsenault
Date: 2022-11-29T18:36:53-05:00
New Revision: b5bc205d75ca669c9707792c72c79e3bf0586f89

URL: https://github.com/llvm/llvm-project/commit/b5bc205d75ca669c9707792c72c79e3bf0586f89
DIFF: https://github.com/llvm/llvm-project/commit/b5bc205d75ca669c9707792c72c79e3bf0586f89.diff

LOG: AMDGPU: Convert some bit operation tests to opaque pointers

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
    llvm/test/CodeGen/AMDGPU/and-gcn.ll
    llvm/test/CodeGen/AMDGPU/and.ll
    llvm/test/CodeGen/AMDGPU/andorbitset.ll
    llvm/test/CodeGen/AMDGPU/andorn2.ll
    llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
    llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
    llvm/test/CodeGen/AMDGPU/bfe_uint.ll
    llvm/test/CodeGen/AMDGPU/bfi_int.ll
    llvm/test/CodeGen/AMDGPU/bfm.ll
    llvm/test/CodeGen/AMDGPU/bitreverse.ll
    llvm/test/CodeGen/AMDGPU/bswap.ll
    llvm/test/CodeGen/AMDGPU/ctlz.ll
    llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
    llvm/test/CodeGen/AMDGPU/cttz.ll
    llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
    llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
    llvm/test/CodeGen/AMDGPU/fshl.ll
    llvm/test/CodeGen/AMDGPU/fshr.ll
    llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
    llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
    llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
    llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
    llvm/test/CodeGen/AMDGPU/nor.ll
    llvm/test/CodeGen/AMDGPU/or.ll
    llvm/test/CodeGen/AMDGPU/permute.ll
    llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
    llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
    llvm/test/CodeGen/AMDGPU/shift-i128.ll
    llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
    llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
    llvm/test/CodeGen/AMDGPU/shl.ll
    llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
    llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
    llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
    llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
    llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
    llvm/test/CodeGen/AMDGPU/sra.ll
    llvm/test/CodeGen/AMDGPU/srl.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
index 88ae414eb05f..fb622f2aaff1 100644

--- a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
+++ b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -5,14 +5,14 @@
 ; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], s[[SHR]]
 
-define amdgpu_kernel void @alignbit_shr_pat(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 31
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = lshr i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -21,18 +21,18 @@ bb:
 ; GCN-DAG: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], v[[SHR]]
 
-define amdgpu_kernel void @alignbit_shr_pat_v(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_v(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
 bb:
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep1 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %tid
-  %tmp = load i64, i64 addrspace(1)* %gep1, align 8
-  %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i32 %tid
-  %amt = load i32, i32 addrspace(1)* %gep2, align 4
+  %gep1 = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %tid
+  %tmp = load i64, ptr addrspace(1) %gep1, align 8
+  %gep2 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tid
+  %amt = load i32, ptr addrspace(1) %gep2, align 4
   %tmp3 = and i32 %amt, 31
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = lshr i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %gep2, align 4
+  store i32 %tmp6, ptr addrspace(1) %gep2, align 4
   ret void
 }
 
@@ -41,14 +41,14 @@ bb:
 ; GCN: v_lshr_b64
 ; GCN-NOT: v_alignbit_b32
 
-define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 30
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = lshr i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -57,14 +57,14 @@ bb:
 ; GCN: v_lshr_b64
 ; GCN-NOT: v_alignbit_b32
 
-define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 63
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = lshr i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -72,12 +72,12 @@ bb:
 ; GCN: load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; GCN: v_alignbit_b32 v{{[0-9]+}}, v[[HI]], v[[LO]], 30
 
-define amdgpu_kernel void @alignbit_shr_pat_const30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_const30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp5 = lshr i64 %tmp, 30
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -86,12 +86,12 @@ bb:
 ; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; GCN-NOT: v_alignbit_b32
 
-define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1) {
+define amdgpu_kernel void @alignbit_shr_pat_wrong_const33(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp5 = lshr i64 %tmp, 33
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/and-gcn.ll b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
index ef11ae87267e..07458fcdbdd9 100644
--- a/llvm/test/CodeGen/AMDGPU/and-gcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
@@ -3,21 +3,21 @@
 
 ; FUNC-LABEL: {{^}}v_and_i64_br:
 ; SI: s_and_b64
-define amdgpu_kernel void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i64_br(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
 entry:
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %tmp0 = icmp eq i32 %tid, 0
   br i1 %tmp0, label %if, label %endif
 
 if:
-  %a = load i64, i64 addrspace(1)* %aptr, align 8
-  %b = load i64, i64 addrspace(1)* %bptr, align 8
+  %a = load i64, ptr addrspace(1) %aptr, align 8
+  %b = load i64, ptr addrspace(1) %bptr, align 8
   %and = and i64 %a, %b
   br label %endif
 
 endif:
   %tmp1 = phi i64 [%and, %if], [0, %entry]
-  store i64 %tmp1, i64 addrspace(1)* %out, align 8
+  store i64 %tmp1, ptr addrspace(1) %out, align 8
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
index 78aa2cc01c6d..8ca046688be5 100644
--- a/llvm/test/CodeGen/AMDGPU/and.ll
+++ b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -11,12 +11,12 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
 
-define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = and <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -32,28 +32,28 @@ define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa
 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
 
-define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @test4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = and <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}s_and_i32:
 ; SI: s_and_b32
-define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %and = and i32 %a, %b
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}s_and_constant_i32:
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
-define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @s_and_constant_i32(ptr addrspace(1) %out, i32 %a) {
   %and = and i32 %a, 1234567
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -66,13 +66,13 @@ define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
 ; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x12d687
 ; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x12d687
 ; SI: buffer_store_dword [[VK]]
-define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i32_0(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %and = and i32 %a, 1234567
 
   ; Just to stop future replacement of copy to vgpr + store with VALU op.
   %foo = add i32 %and, %b
-  store volatile i32 %foo, i32 addrspace(1)* %out
-  store volatile i32 1234567, i32 addrspace(1)* %out
+  store volatile i32 %foo, ptr addrspace(1) %out
+  store volatile i32 1234567, ptr addrspace(1) %out
   ret void
 }
 
@@ -83,25 +83,25 @@ define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out
 ; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, 0x12d687
 ; SI: v_mov_b32_e32 [[VADD:v[0-9]+]], [[ADD]]
 ; SI: buffer_store_dword [[VADD]]
-define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i32_1(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %and = and i32 %a, 1234567
   %foo = add i32 %and, 1234567
   %bar = add i32 %foo, %b
-  store volatile i32 %bar, i32 addrspace(1)* %out
+  store volatile i32 %bar, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
 ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i32_vgpr_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
-  %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
-  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep.a
-  %b = load i32, i32 addrspace(1)* %gep.b
+  %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+  %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
+  %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %a = load i32, ptr addrspace(1) %gep.a
+  %b = load i32, ptr addrspace(1) %gep.b
   %and = and i32 %a, %b
-  store i32 %and, i32 addrspace(1)* %gep.out
+  store i32 %and, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -109,13 +109,13 @@ define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrs
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]]
 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
-define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i32_sgpr_vgpr(ptr addrspace(1) %out, i32 %a, ptr addrspace(1) %bptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
-  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32, i32 addrspace(1)* %gep.b
+  %gep.b = getelementptr i32, ptr addrspace(1) %bptr, i32 %tid
+  %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %b = load i32, ptr addrspace(1) %gep.b
   %and = and i32 %a, %b
-  store i32 %and, i32 addrspace(1)* %gep.out
+  store i32 %and, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -123,54 +123,54 @@ define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]]
 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
-define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
+define amdgpu_kernel void @v_and_i32_vgpr_sgpr(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i32 %b) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
-  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep.a
+  %gep.a = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+  %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %a = load i32, ptr addrspace(1) %gep.a
   %and = and i32 %a, %b
-  store i32 %and, i32 addrspace(1)* %gep.out
+  store i32 %and, ptr addrspace(1) %gep.out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_and_constant_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_constant_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, 1234567
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, 64
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
-define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
-  %a = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %a, -16
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}s_and_i64
 ; SI: s_and_b64
-define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_and_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
   %and = and i64 %a, %b
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -181,9 +181,9 @@ define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 ; SI: s_and_b32 [[AND_TRUNC:s[0-9]+]], [[AND]], 1{{$}}
 ; SI: v_mov_b32_e32 [[V_AND_TRUNC:v[0-9]+]], [[AND_TRUNC]]
 ; SI: buffer_store_byte [[V_AND_TRUNC]]
-define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
+define amdgpu_kernel void @s_and_i1(ptr addrspace(1) %out, i1 %a, i1 %b) {
   %and = and i1 %a, %b
-  store i1 %and, i1 addrspace(1)* %out
+  store i1 %and, ptr addrspace(1) %out
   ret void
 }
 
@@ -191,9 +191,9 @@ define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}}
 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}}
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_and_constant_i64(ptr addrspace(1) %out, i64 %a) {
   %and = and i64 %a, 549756338176
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -201,11 +201,11 @@ define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 ; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
 ; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[KLO]]:[[KHI]]]
-define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @s_and_multi_use_constant_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
   %and0 = and i64 %a, 549756338176
   %and1 = and i64 %b, 549756338176
-  store volatile i64 %and0, i64 addrspace(1)* %out
-  store volatile i64 %and1, i64 addrspace(1)* %out
+  store volatile i64 %and0, ptr addrspace(1) %out
+  store volatile i64 %and1, ptr addrspace(1) %out
   ret void
 }
 
@@ -215,9 +215,9 @@ define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}}
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i32, i64 %a) {
+define amdgpu_kernel void @s_and_32_bit_constant_i64(ptr addrspace(1) %out, i32, i64 %a) {
   %and = and i64 %a, 1234567
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -233,29 +233,29 @@ define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i32
 ; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) {
+define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(ptr addrspace(1) %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) {
   %shl.a = shl i64 %a, 1
   %shl.b = shl i64 %b, 1
   %and0 = and i64 %shl.a, 62
   %and1 = and i64 %shl.b, 62
   %add0 = add i64 %and0, %c
   %add1 = add i64 %and1, %c
-  store volatile i64 %add0, i64 addrspace(1)* %out
-  store volatile i64 %add1, i64 addrspace(1)* %out
+  store volatile i64 %add0, ptr addrspace(1) %out
+  store volatile i64 %add1, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_and_i64:
 ; SI: v_and_b32
 ; SI: v_and_b32
-define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+define amdgpu_kernel void @v_and_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.a, align 8
-  %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
-  %b = load i64, i64 addrspace(1)* %gep.b, align 8
+  %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.a, align 8
+  %gep.b = getelementptr i64, ptr addrspace(1) %bptr, i32 %tid
+  %b = load i64, ptr addrspace(1) %gep.b, align 8
   %and = and i64 %a, %b
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -263,12 +263,12 @@ define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}}
 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.a, align 8
+  %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 1231231234567
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -281,13 +281,13 @@ define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrsp
 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, v[[HI1]]
 ; SI: buffer_store_dwordx2
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load volatile i64, i64 addrspace(1)* %aptr
-  %b = load volatile i64, i64 addrspace(1)* %aptr
+define amdgpu_kernel void @v_and_multi_use_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
+  %a = load volatile i64, ptr addrspace(1) %aptr
+  %b = load volatile i64, ptr addrspace(1) %aptr
   %and0 = and i64 %a, 1231231234567
   %and1 = and i64 %b, 1231231234567
-  store volatile i64 %and0, i64 addrspace(1)* %out
-  store volatile i64 %and1, i64 addrspace(1)* %out
+  store volatile i64 %and0, ptr addrspace(1) %out
+  store volatile i64 %and1, ptr addrspace(1) %out
   ret void
 }
 
@@ -301,13 +301,13 @@ define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out,
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2 v[[[RESLO0]]
 ; SI: buffer_store_dwordx2 v[[[RESLO1]]
-define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load volatile i64, i64 addrspace(1)* %aptr
-  %b = load volatile i64, i64 addrspace(1)* %aptr
+define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
+  %a = load volatile i64, ptr addrspace(1) %aptr
+  %b = load volatile i64, ptr addrspace(1) %aptr
   %and0 = and i64 %a, 63
   %and1 = and i64 %b, 63
-  store volatile i64 %and0, i64 addrspace(1)* %out
-  store volatile i64 %and1, i64 addrspace(1)* %out
+  store volatile i64 %and0, ptr addrspace(1) %out
+  store volatile i64 %and1, ptr addrspace(1) %out
   ret void
 }
 
@@ -317,12 +317,12 @@ define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]]
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_i64_32_bit_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.a, align 8
+  %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 1234567
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -332,12 +332,12 @@ define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.a, align 8
+  %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, 64
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -348,12 +348,12 @@ define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addr
 ; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2 v[[[VAL_LO]]:[[VAL_HI]]]
-define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_and_inline_neg_imm_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.a, align 8
+  %gep.a = getelementptr i64, ptr addrspace(1) %aptr, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.a, align 8
   %and = and i64 %a, -8
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -363,9 +363,9 @@ define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64
 ; SI-NOT: and
 ; SI: buffer_store_dword
-define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 64
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -377,11 +377,11 @@ define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
 ; SI-NOT: and
 ; SI: s_add_u32
 ; SI-NEXT: s_addc_u32
-define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i32, i64 %b) {
+define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a, i32, i64 %b) {
   %shl = shl i64 %a, 1
   %and = and i64 %shl, 64
   %add = add i64 %and, %b
-  store i64 %add, i64 addrspace(1)* %out, align 8
+  store i64 %add, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -391,9 +391,9 @@ define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %o
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 1
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -406,9 +406,9 @@ define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 ad
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_1.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 4607182418800017408
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -421,9 +421,9 @@ define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 13830554455654793216
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -436,9 +436,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_0.5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 4602678819172646912
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -451,9 +451,9 @@ define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 13826050856027422720
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -464,9 +464,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_2.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 4611686018427387904
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -477,9 +477,9 @@ define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 13835058055282163712
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -492,9 +492,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 4616189618054758400
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -507,9 +507,9 @@ define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64
 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 13839561654909534208
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -524,9 +524,9 @@ define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 1082130432
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -537,9 +537,9 @@ define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out,
 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, -1065353216
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -550,9 +550,9 @@ define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %o
 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 4647714815446351872
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -563,9 +563,9 @@ define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %
 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
 ; SI-NOT: and
 ; SI: buffer_store_dwordx2
-define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
   %and = and i64 %a, 13871086852301127680
-  store i64 %and, i64 addrspace(1)* %out, align 8
+  store i64 %and, ptr addrspace(1) %out, align 8
   ret void
 }
 attributes #0 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index 196e813c71a7..a7d61582598f 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -2,49 +2,49 @@
 
 ; SI-LABEL: {{^}}s_clear_msb:
 ; SI: s_bitset0_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_clear_msb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
   %x = and i32 %in, 2147483647
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_set_msb:
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_set_msb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
   %x = or i32 %in, 2147483648
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_clear_lsb:
 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, -2
-define amdgpu_kernel void @s_clear_lsb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
   %x = and i32 %in, 4294967294
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_set_lsb:
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
-define amdgpu_kernel void @s_set_lsb(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
   %x = or i32 %in, 1
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_clear_midbit:
 ; SI: s_bitset0_b32 s{{[0-9]+}}, 8
-define amdgpu_kernel void @s_clear_midbit(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
   %x = and i32 %in, 4294967039
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_set_midbit:
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 8
-define amdgpu_kernel void @s_set_midbit(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
   %x = or i32 %in, 256
-  store i32 %x, i32 addrspace(1)* %out
+  store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
@@ -58,7 +58,7 @@ define void @bitset_verifier_error() local_unnamed_addr #0 {
 bb:
   %i = call float @llvm.fabs.f32(float undef) #0
   %i1 = bitcast float %i to i32
-  store i32 %i1, i32 addrspace(1)* @gv
+  store i32 %i1, ptr addrspace(1) @gv
   br label %bb2
 
 bb2:

diff  --git a/llvm/test/CodeGen/AMDGPU/andorn2.ll b/llvm/test/CodeGen/AMDGPU/andorn2.ll
index cf642155ecef..845ff782eddd 100644
--- a/llvm/test/CodeGen/AMDGPU/andorn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorn2.ll
@@ -6,44 +6,44 @@
 ; GCN-LABEL: {{^}}scalar_andn2_i32_one_use
 ; GCN: s_andn2_b32
 define amdgpu_kernel void @scalar_andn2_i32_one_use(
-    i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+    ptr addrspace(1) %r0, i32 %a, i32 %b) {
 entry:
   %nb = xor i32 %b, -1
   %r0.val = and i32 %a, %nb
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
 ; GCN-LABEL: {{^}}scalar_andn2_i64_one_use
 ; GCN: s_andn2_b64
 define amdgpu_kernel void @scalar_andn2_i64_one_use(
-    i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+    ptr addrspace(1) %r0, i64 %a, i64 %b) {
 entry:
   %nb = xor i64 %b, -1
   %r0.val = and i64 %a, %nb
-  store i64 %r0.val, i64 addrspace(1)* %r0
+  store i64 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
 ; GCN-LABEL: {{^}}scalar_orn2_i32_one_use
 ; GCN: s_orn2_b32
 define amdgpu_kernel void @scalar_orn2_i32_one_use(
-    i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+    ptr addrspace(1) %r0, i32 %a, i32 %b) {
 entry:
   %nb = xor i32 %b, -1
   %r0.val = or i32 %a, %nb
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
 ; GCN-LABEL: {{^}}scalar_orn2_i64_one_use
 ; GCN: s_orn2_b64
 define amdgpu_kernel void @scalar_orn2_i64_one_use(
-    i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+    ptr addrspace(1) %r0, i64 %a, i64 %b) {
 entry:
   %nb = xor i64 %b, -1
   %r0.val = or i64 %a, %nb
-  store i64 %r0.val, i64 addrspace(1)* %r0
+  store i64 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -51,12 +51,12 @@ entry:
 ; GCN: v_not_b32
 ; GCN: v_and_b32
 define amdgpu_kernel void @vector_andn2_i32_s_v_one_use(
-    i32 addrspace(1)* %r0, i32 %s) {
+    ptr addrspace(1) %r0, i32 %s) {
 entry:
   %v = call i32 @llvm.amdgcn.workitem.id.x() #1
   %not = xor i32 %v, -1
   %r0.val = and i32 %s, %not
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -64,12 +64,12 @@ entry:
 ; GCN: s_not_b32
 ; GCN: v_and_b32
 define amdgpu_kernel void @vector_andn2_i32_v_s_one_use(
-    i32 addrspace(1)* %r0, i32 %s) {
+    ptr addrspace(1) %r0, i32 %s) {
 entry:
   %v = call i32 @llvm.amdgcn.workitem.id.x() #1
   %not = xor i32 %s, -1
   %r0.val = and i32 %v, %not
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -77,12 +77,12 @@ entry:
 ; GCN: v_not_b32
 ; GCN: v_or_b32
 define amdgpu_kernel void @vector_orn2_i32_s_v_one_use(
-    i32 addrspace(1)* %r0, i32 %s) {
+    ptr addrspace(1) %r0, i32 %s) {
 entry:
   %v = call i32 @llvm.amdgcn.workitem.id.x() #1
   %not = xor i32 %v, -1
   %r0.val = or i32 %s, %not
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -90,12 +90,12 @@ entry:
 ; GCN: s_not_b32
 ; GCN: v_or_b32
 define amdgpu_kernel void @vector_orn2_i32_v_s_one_use(
-    i32 addrspace(1)* %r0, i32 %s) {
+    ptr addrspace(1) %r0, i32 %s) {
 entry:
   %v = call i32 @llvm.amdgcn.workitem.id.x() #1
   %not = xor i32 %s, -1
   %r0.val = or i32 %v, %not
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
index 654aac83b9cc..e909cf785026 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -22,9 +22,9 @@
 ; CIVI: s_and_b32
 ; CIVI: s_or_b32
 
-define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_ashr_v2i16(ptr addrspace(1) %out, i32, <2 x i16> %lhs, i32, <2 x i16> %rhs) #0 {
   %result = ashr <2 x i16> %lhs, %rhs
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -44,16 +44,16 @@ define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, i32, <2 x
 ; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 ; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <2 x i16>, ptr addrspace(1) %in.gep
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = ashr <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -61,14 +61,14 @@ define amdgpu_kernel void @v_ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX9: s_load_dword [[RHS:s[0-9]+]]
 ; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = ashr <2 x i16> %vgpr, %sgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -76,42 +76,42 @@ define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
 ; GFX9: s_load_dword [[LHS:s[0-9]+]]
 ; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
-define amdgpu_kernel void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @ashr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = ashr <2 x i16> %sgpr, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}ashr_imm_v_v2i16:
 ; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4
-define amdgpu_kernel void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = ashr <2 x i16> <i16 -4, i16 -4>, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}ashr_v_imm_v2i16:
 ; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]]
-define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = ashr <2 x i16> %vgpr, <i16 8, i16 8>
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -128,16 +128,16 @@ define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
 ; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <4 x i16>, ptr addrspace(1) %in.gep
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = ashr <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -146,14 +146,14 @@ define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; GCN: {{buffer|flat|global}}_store_dwordx2
-define amdgpu_kernel void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
   %result = ashr <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
index 1c72c08a34c7..2ac65f8a1618 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 
-define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
 ; SI-LABEL: v_ubfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -40,19 +40,19 @@ define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
-  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %src = load volatile i32, i32 addrspace(1)* %in0.gep
-  %width = load volatile i32, i32 addrspace(1)* %in0.gep
+  %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+  %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %src = load volatile i32, ptr addrspace(1) %in0.gep
+  %width = load volatile i32, ptr addrspace(1) %in0.gep
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = lshr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
+  store i32 %bfe, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
 ; SI-LABEL: v_ubfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -99,20 +99,20 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
-  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %src = load volatile i32, i32 addrspace(1)* %in0.gep
-  %width = load volatile i32, i32 addrspace(1)* %in0.gep
+  %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+  %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %src = load volatile i32, ptr addrspace(1) %in0.gep
+  %width = load volatile i32, ptr addrspace(1) %in0.gep
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = lshr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
-  store volatile i32 %shl, i32 addrspace(1)* undef
+  store i32 %bfe, ptr addrspace(1) %out.gep
+  store volatile i32 %shl, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
 ; SI-LABEL: s_ubfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -144,15 +144,15 @@ define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = lshr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
+  store i32 %bfe, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
 ; SI-LABEL: s_ubfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -191,16 +191,16 @@ define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = lshr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
-  store volatile i32 %shl, i32 addrspace(1)* undef
+  store i32 %bfe, ptr addrspace(1) %out.gep
+  store volatile i32 %shl, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
 ; SI-LABEL: v_sbfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -238,19 +238,19 @@ define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace(
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
-  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %src = load volatile i32, i32 addrspace(1)* %in0.gep
-  %width = load volatile i32, i32 addrspace(1)* %in0.gep
+  %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+  %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %src = load volatile i32, ptr addrspace(1) %in0.gep
+  %width = load volatile i32, ptr addrspace(1) %in0.gep
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = ashr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
+  store i32 %bfe, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #1 {
+define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
 ; SI-LABEL: v_sbfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -297,20 +297,20 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %id.x
-  %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %src = load volatile i32, i32 addrspace(1)* %in0.gep
-  %width = load volatile i32, i32 addrspace(1)* %in0.gep
+  %in0.gep = getelementptr i32, ptr addrspace(1) %in0, i32 %id.x
+  %in1.gep = getelementptr i32, ptr addrspace(1) %in1, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %src = load volatile i32, ptr addrspace(1) %in0.gep
+  %width = load volatile i32, ptr addrspace(1) %in0.gep
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = ashr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
-  store volatile i32 %shl, i32 addrspace(1)* undef
+  store i32 %bfe, ptr addrspace(1) %out.gep
+  store volatile i32 %shl, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
 ; SI-LABEL: s_sbfe_sub_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -342,15 +342,15 @@ define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = ashr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
+  store i32 %bfe, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
+define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
 ; SI-LABEL: s_sbfe_sub_multi_use_shl_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -389,16 +389,16 @@ define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
   %sub = sub i32 32, %width
   %shl = shl i32 %src, %sub
   %bfe = ashr i32 %shl, %sub
-  store i32 %bfe, i32 addrspace(1)* %out.gep
-  store volatile i32 %shl, i32 addrspace(1)* undef
+  store i32 %bfe, ptr addrspace(1) %out.gep
+  store volatile i32 %shl, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
 ; SI-LABEL: s_sbfe_or_shl_shl_uniform_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -430,18 +430,18 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  %a0 = load i32, i32 addrspace(1) * %in0
-  %b0 = load i32, i32 addrspace(1) * %in1
+  %a0 = load i32, ptr addrspace(1) %in0
+  %b0 = load i32, ptr addrspace(1) %in1
   %a1 = shl i32 %a0, 17
   %b1 = shl i32 %b0, 17
   %or = or i32 %a1, %b1
   %result = ashr i32 %or, 17
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; TODO ashr(or(shl(x,c1),shl(y,c2)),c1) -> sign_extend_inreg(or(x,shl(y,c2-c1))) iff c2 >= c1
-define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x, i32 addrspace(1)* %y) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
 ; SI-LABEL: s_sbfe_or_shl_shl_nonuniform_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -477,18 +477,18 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(i32 addrspace(1)* %o
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  %a0 = load i32, i32 addrspace(1) * %x
-  %b0 = load i32, i32 addrspace(1) * %y
+  %a0 = load i32, ptr addrspace(1) %x
+  %b0 = load i32, ptr addrspace(1) %y
   %a1 = shl i32 %a0, 17
   %b1 = shl i32 %b0, 19
   %or = or i32 %a1, %b1
   %result = ashr i32 %or, 17
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 ; Don't fold as 'other shl' amount is less than the sign_extend_inreg type.
-define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x, i32 addrspace(1)* %y) {
+define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
 ; SI-LABEL: s_sbfe_or_shl_shl_toosmall_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -524,13 +524,13 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(i32 addrspace(1)* %out
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  %a0 = load i32, i32 addrspace(1) * %x
-  %b0 = load i32, i32 addrspace(1) * %y
+  %a0 = load i32, ptr addrspace(1) %x
+  %b0 = load i32, ptr addrspace(1) %y
   %a1 = shl i32 %a0, 17
   %b1 = shl i32 %b0, 16
   %or = or i32 %a1, %b1
   %result = ashr i32 %or, 17
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/bfe_uint.ll b/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
index 2c8c9a5ec932..4b91dbdd6ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe_uint.ll
@@ -2,11 +2,11 @@
 
 ; CHECK: {{^}}bfe_def:
 ; CHECK: BFE_UINT
-define amdgpu_kernel void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
+define amdgpu_kernel void @bfe_def(ptr addrspace(1) %out, i32 %x) {
 entry:
   %0 = lshr i32 %x, 5
   %1 = and i32 %0, 15 ; 0xf
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }
 
@@ -17,10 +17,10 @@ entry:
 
 ; CHECK: {{^}}bfe_shift:
 ; CHECK-NOT: BFE_UINT
-define amdgpu_kernel void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
+define amdgpu_kernel void @bfe_shift(ptr addrspace(1) %out, i32 %x) {
 entry:
   %0 = lshr i32 %x, 16
   %1 = and i32 %0, 65535 ; 0xffff
-  store i32 %1, i32 addrspace(1)* %out
+  store i32 %1, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index 6c0a183289e1..445bc7f586ec 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -8,7 +8,7 @@
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
 ;
-define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_def_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; GFX7-LABEL: s_bfi_def_i32:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -85,7 +85,7 @@ entry:
   %1 = and i32 %z, %0
   %2 = and i32 %y, %x
   %3 = or i32 %1, %2
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -131,7 +131,7 @@ entry:
 
 ; SHA-256 Ch function
 ; z ^ (x & (y ^ z))
-define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; GFX7-LABEL: s_bfi_sha256_ch:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -207,7 +207,7 @@ entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
   %2 = xor i32 %z, %1
-  store i32 %2, i32 addrspace(1)* %out
+  store i32 %2, ptr addrspace(1) %out
   ret void
 }
 
@@ -469,7 +469,7 @@ entry:
 
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
-define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @s_bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
 ; GFX7-LABEL: s_bfi_sha256_ma:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -551,7 +551,7 @@ entry:
   %1 = or i32 %x, %z
   %2 = and i32 %y, %1
   %3 = or i32 %0, %2
-  store i32 %3, i32 addrspace(1)* %out
+  store i32 %3, ptr addrspace(1) %out
   ret void
 }
 
@@ -1633,7 +1633,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
   %and1 = and i64 %not.a, %mask
   %bitselect = or i64 %and0, %and1
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -1721,7 +1721,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
   %bitselect = xor i64 %and, %mask
 
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -1809,7 +1809,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
   %bitselect = xor i64 %and, %mask
 
   %scalar.use = add i64 %bitselect, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }
 
@@ -1904,6 +1904,6 @@ entry:
   %or1 = or i64 %and0, %and1
 
   %scalar.use = add i64 %or1, 10
-  store i64 %scalar.use, i64 addrspace(1)* undef
+  store i64 %scalar.use, ptr addrspace(1) undef
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/bfm.ll b/llvm/test/CodeGen/AMDGPU/bfm.ll
index 2c9c692c42a4..789a64220e62 100644
--- a/llvm/test/CodeGen/AMDGPU/bfm.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfm.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
 
-define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
 ; SI-LABEL: s_bfm_pattern:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -29,11 +29,11 @@ define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y)
   %a = shl i32 1, %x
   %b = sub i32 %a, 1
   %c = shl i32 %b, %y
-  store i32 %c, i32 addrspace(1)* %out
+  store i32 %c, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
 ; SI-LABEL: s_bfm_pattern_simple:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -59,11 +59,11 @@ define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x)
 ; VI-NEXT:    s_endpgm
   %a = shl i32 1, %x
   %b = sub i32 %a, 1
-  store i32 %b, i32 addrspace(1)* %out
+  store i32 %b, ptr addrspace(1) %out
   ret void
 }
 
-define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define void @v_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
 ; SI-LABEL: v_bfm_pattern:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -86,11 +86,11 @@ define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
   %a = shl i32 1, %x
   %b = sub i32 %a, 1
   %c = shl i32 %b, %y
-  store i32 %c, i32 addrspace(1)* %out
+  store i32 %c, ptr addrspace(1) %out
   ret void
 }
 
-define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+define void @v_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
 ; SI-LABEL: v_bfm_pattern_simple:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,7 +112,7 @@ define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %a = shl i32 1, %x
   %b = sub i32 %a, 1
-  store i32 %b, i32 addrspace(1)* %out
+  store i32 %b, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 79c95d930a4f..3090b4b14709 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -16,7 +16,7 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
 declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
 declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
 
-define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
+define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #0 {
 ; SI-LABEL: s_brev_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -57,11 +57,11 @@ define amdgpu_kernel void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val)
 ; GISEL-NEXT:    flat_store_short v[0:1], v2
 ; GISEL-NEXT:    s_endpgm
   %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
-  store i16 %brev, i16 addrspace(1)* %out
+  store i16 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
 ; SI-LABEL: v_brev_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -114,13 +114,13 @@ define amdgpu_kernel void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrsp
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GISEL-NEXT:    flat_store_short v[0:1], v2
 ; GISEL-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %valptr
+  %val = load i16, ptr addrspace(1) %valptr
   %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
-  store i16 %brev, i16 addrspace(1)* %out
+  store i16 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
+define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) #0 {
 ; SI-LABEL: s_brev_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -157,11 +157,11 @@ define amdgpu_kernel void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val)
 ; GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GISEL-NEXT:    s_endpgm
   %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
-  store i32 %brev, i32 addrspace(1)* %out
+  store i32 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
 ; SI-LABEL: v_brev_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -214,14 +214,14 @@ define amdgpu_kernel void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrsp
 ; GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %gep
   %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
-  store i32 %brev, i32 addrspace(1)* %out
+  store i32 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i32(ptr addrspace(1) noalias %out, <2 x i32> %val) #0 {
 ; SI-LABEL: s_brev_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -265,11 +265,11 @@ define amdgpu_kernel void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
 ; GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GISEL-NEXT:    s_endpgm
   %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
-  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
 ; SI-LABEL: v_brev_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -325,14 +325,14 @@ define amdgpu_kernel void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
 ; GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep
+  %gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i32>, ptr addrspace(1) %gep
   %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
-  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
+define amdgpu_kernel void @s_brev_i64(ptr addrspace(1) noalias %out, i64 %val) #0 {
 ; SI-LABEL: s_brev_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -373,11 +373,11 @@ define amdgpu_kernel void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val)
 ; GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GISEL-NEXT:    s_endpgm
   %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
-  store i64 %brev, i64 addrspace(1)* %out
+  store i64 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
 ; SI-LABEL: v_brev_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -433,14 +433,14 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; GISEL-NEXT:    flat_store_dwordx2 v[3:4], v[1:2]
 ; GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i64, i64 addrspace(1)* %valptr, i32 %tid
-  %val = load i64, i64 addrspace(1)* %gep
+  %gep = getelementptr i64, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i64, ptr addrspace(1) %gep
   %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
-  store i64 %brev, i64 addrspace(1)* %out
+  store i64 %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
+define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64> %val) #0 {
 ; SI-LABEL: s_brev_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -489,11 +489,11 @@ define amdgpu_kernel void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; GISEL-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GISEL-NEXT:    s_endpgm
   %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
-  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %brev, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
+define amdgpu_kernel void @v_brev_v2i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 {
 ; SI-LABEL: v_brev_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -555,10 +555,10 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; GISEL-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x i64> , <2 x i64> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i64>, <2 x i64> addrspace(1)* %gep
+  %gep = getelementptr <2 x i64> , ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i64>, ptr addrspace(1) %gep
   %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
-  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %brev, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index 368463c1ac5c..7202ca648258 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -15,7 +15,7 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
 declare i48 @llvm.bswap.i48(i48) #1
 
-define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -45,13 +45,13 @@ define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(
 ; VI-NEXT:    v_perm_b32 v0, 0, s2, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %in, align 4
+  %val = load i32, ptr addrspace(1) %in, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
-  store i32 %bswap, i32 addrspace(1)* %out, align 4
+  store i32 %bswap, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -85,13 +85,13 @@ define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
 ; VI-NEXT:    v_perm_b32 v0, 0, s2, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, ptr addrspace(1) %in, align 8
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
-  store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %bswap, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -133,13 +133,13 @@ define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
 ; VI-NEXT:    v_perm_b32 v0, 0, s8, v0
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
+  %val = load <4 x i32>, ptr addrspace(1) %in, align 16
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
-  store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %bswap, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_v8i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -199,13 +199,13 @@ define amdgpu_kernel void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
 ; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[12:15], 0 offset:16
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
 ; VI-NEXT:    s_endpgm
-  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
+  %val = load <8 x i32>, ptr addrspace(1) %in, align 32
   %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
-  store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %bswap, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -239,13 +239,13 @@ define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(
 ; VI-NEXT:    v_perm_b32 v0, 0, s3, v0
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load i64, i64 addrspace(1)* %in, align 8
+  %val = load i64, ptr addrspace(1) %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
-  store i64 %bswap, i64 addrspace(1)* %out, align 8
+  store i64 %bswap, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -287,13 +287,13 @@ define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
 ; VI-NEXT:    v_perm_b32 v0, 0, s9, v0
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
-  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, ptr addrspace(1) %in, align 16
   %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
-  store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
+  store <2 x i64> %bswap, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
+define amdgpu_kernel void @test_bswap_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
@@ -353,9 +353,9 @@ define amdgpu_kernel void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
 ; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[12:15], 0 offset:16
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
 ; VI-NEXT:    s_endpgm
-  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
+  %val = load <4 x i64>, ptr addrspace(1) %in, align 32
   %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
-  store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
+  store <4 x i64> %bswap, ptr addrspace(1) %out, align 32
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ddc6734335dc..d4af1d73f66b 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -20,7 +20,7 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_ctlz_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -99,11 +99,11 @@ define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  store i32 %ctlz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -201,14 +201,14 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep, align 4
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep, align 4
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  store i32 %ctlz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -320,14 +320,14 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+  %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
-  store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %ctlz, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -466,14 +466,14 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
-  store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %ctlz, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -588,13 +588,13 @@ define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace
 ; GFX11-NEXT:    global_store_b8 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %val = load i8, i8 addrspace(1)* %valptr
+  %val = load i8, ptr addrspace(1) %valptr
   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
-  store i8 %ctlz, i8 addrspace(1)* %out
+  store i8 %ctlz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
 ; SI-LABEL: s_ctlz_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -691,11 +691,11 @@ define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
-  store i64 %ctlz, i64 addrspace(1)* %out
+  store i64 %ctlz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
 ; SI-LABEL: s_ctlz_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -784,11 +784,11 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
 ; GFX11-NEXT:    s_endpgm
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
   %trunc = trunc i64 %ctlz to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_ctlz_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -906,15 +906,15 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
-  store i64 %ctlz, i64 addrspace(1)* %out.gep
+  store i64 %ctlz, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_ctlz_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1032,16 +1032,16 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
   %trunc = trunc i64 %ctlz to i32
-  store i32 %trunc, i32 addrspace(1)* %out.gep
+  store i32 %trunc, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i32_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1138,16 +1138,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 -1, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i32_sel_ne_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1244,17 +1244,17 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %val, 0
   %sel = select i1 %cmp, i32 %ctlz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; TODO: Should be able to eliminate select here as well.
-define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i32_sel_eq_bitwidth:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1366,16 +1366,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp eq i32 %ctlz, 32
   %sel = select i1 %cmp, i32 -1, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i32_sel_ne_bitwidth:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1487,16 +1487,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %ctlz, 32
   %sel = select i1 %cmp, i32 %ctlz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i8_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1600,16 +1600,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
-  %val = load i8, i8 addrspace(1)* %valptr.gep
+  %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i8, ptr addrspace(1) %valptr.gep
   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
   %cmp = icmp eq i8 %val, 0
   %sel = select i1 %cmp, i8 -1, i8 %ctlz
-  store i8 %sel, i8 addrspace(1)* %out
+  store i8 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i16_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1726,16 +1726,16 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %valptr
+  %val = load i16, ptr addrspace(1) %valptr
   %ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
   %cmp = icmp eq i16 %val, 0
   %sel = select i1 %cmp, i16 -1, i16 %ctlz
-  store i16 %sel, i16 addrspace(1)* %out
+  store i16 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_i7_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1845,11 +1845,11 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
-  %val = load i7, i7 addrspace(1)* %valptr.gep
+  %valptr.gep = getelementptr i7, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i7, ptr addrspace(1) %valptr.gep
   %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
   %cmp = icmp eq i7 %val, 0
   %sel = select i1 %cmp, i7 -1, i7 %ctlz
-  store i7 %sel, i7 addrspace(1)* %out
+  store i7 %sel, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 0d02bf8c01a2..354f5b954659 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -16,7 +16,7 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_ctlz_zero_undef_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -63,11 +63,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  store i32 %ctlz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -134,14 +134,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep, align 4
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep, align 4
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
+  store i32 %ctlz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -212,14 +212,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
 ; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+  %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
-  store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %ctlz, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -298,14 +298,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
 ; GFX9-GISEL-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
-  store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %ctlz, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -388,14 +388,14 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i
 ; GFX9-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
-  %val = load i8, i8 addrspace(1)* %in.gep
+  %in.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i8, ptr addrspace(1) %in.gep
   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
-  store i8 %ctlz, i8 addrspace(1)* %out
+  store i8 %ctlz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
 ; SI-LABEL: s_ctlz_zero_undef_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -457,11 +457,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
-  store i64 %ctlz, i64 addrspace(1)* %out
+  store i64 %ctlz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
 ; SI-LABEL: s_ctlz_zero_undef_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -518,11 +518,11 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
 ; GFX9-GISEL-NEXT:    s_endpgm
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
   %trunc = trunc i64 %ctlz to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -604,15 +604,15 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
-  store i64 %ctlz, i64 addrspace(1)* %out.gep
+  store i64 %ctlz, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -694,16 +694,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
   %trunc = trunc i64 %ctlz to i32
-  store i32 %trunc, i32 addrspace(1)* %out.gep
+  store i32 %trunc, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -773,16 +773,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* n
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 -1, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -852,16 +852,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* n
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp ne i32 %val, 0
   %sel = select i1 %cmp, i32 %ctlz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i8_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -941,16 +941,16 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noa
 ; GFX9-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
-  %val = load i8, i8 addrspace(1)* %valptr.gep
+  %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i8, ptr addrspace(1) %valptr.gep
   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
   %cmp = icmp eq i8 %val, 0
   %sel = select i1 %cmp, i8 -1, i8 %ctlz
-  store i8 %sel, i8 addrspace(1)* %out
+  store i8 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1042,18 +1042,18 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspa
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 -1, i32 %ctlz
-  store volatile i32 %sel, i32 addrspace(1)* %out
-  store volatile i1 %cmp, i1 addrspace(1)* undef
+  store volatile i32 %sel, ptr addrspace(1) %out
+  store volatile i1 %cmp, ptr addrspace(1) undef
   ret void
 }
 
 ; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1127,17 +1127,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noal
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 0, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1211,17 +1211,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noal
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp ne i32 %val, 0
   %sel = select i1 %cmp, i32 %ctlz, i32 0
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; Compare on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1296,17 +1296,17 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp eq i32 %val, 1
   %sel = select i1 %cmp, i32 0, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; Selected on wrong constant
-define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1381,11 +1381,11 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   %cmp = icmp ne i32 %val, 1
   %sel = select i1 %cmp, i32 %ctlz, i32 0
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index fd8c73926650..e871b80cbe29 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -19,7 +19,7 @@ declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) nounwind readnone
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_cttz_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -84,11 +84,11 @@ define amdgpu_kernel void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val)
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
-  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  store i32 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -172,14 +172,14 @@ define amdgpu_kernel void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep, align 4
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep, align 4
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
-  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  store i32 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -274,14 +274,14 @@ define amdgpu_kernel void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
 ; GFX10-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+  %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
-  store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %cttz, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -398,14 +398,14 @@ define amdgpu_kernel void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
 ; GFX10-GISEL-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
-  store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %cttz, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i8(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i8:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -496,13 +496,13 @@ define amdgpu_kernel void @v_cttz_i8(i8 addrspace(1)* noalias %out, i8 addrspace
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
 ; GFX10-GISEL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
-  %val = load i8, i8 addrspace(1)* %valptr
+  %val = load i8, ptr addrspace(1) %valptr
   %cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
-  store i8 %cttz, i8 addrspace(1)* %out
+  store i8 %cttz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
 ; SI-LABEL: s_cttz_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x13
@@ -582,11 +582,11 @@ define amdgpu_kernel void @s_cttz_i64(i64 addrspace(1)* noalias %out, [8 x i32],
 ; GFX10-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
-  store i64 %cttz, i64 addrspace(1)* %out
+  store i64 %cttz, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_i64_trunc(ptr addrspace(1) noalias %out, i64 %val) nounwind {
 ; SI-LABEL: s_cttz_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -660,11 +660,11 @@ define amdgpu_kernel void @s_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
 ; GFX10-GISEL-NEXT:    s_endpgm
   %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
   %trunc = trunc i64 %cttz to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_cttz_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_cttz_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -765,15 +765,15 @@ define amdgpu_kernel void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; GFX10-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
-  store i64 %cttz, i64 addrspace(1)* %out.gep
+  store i64 %cttz, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v_cttz_i64_trunc(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v_cttz_i64_trunc:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -874,16 +874,16 @@ define amdgpu_kernel void @v_cttz_i64_trunc(i32 addrspace(1)* noalias %out, i64
 ; GFX10-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %val = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %tid
+  %val = load i64, ptr addrspace(1) %in.gep
   %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false)
   %trunc = trunc i64 %cttz to i32
-  store i32 %trunc, i32 addrspace(1)* %out.gep
+  store i32 %trunc, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -968,16 +968,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 -1, i32 %cttz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_ne_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1062,17 +1062,17 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %val, 0
   %sel = select i1 %cmp, i32 %cttz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; TODO: Should be able to eliminate select here as well.
-define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_eq_bitwidth:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1168,16 +1168,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp eq i32 %cttz, 32
   %sel = select i1 %cmp, i32 -1, i32 %cttz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_ne_bitwidth:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1273,16 +1273,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %cttz, 32
   %sel = select i1 %cmp, i32 %cttz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i8_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1373,16 +1373,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX10-GISEL-NEXT:    global_store_byte v2, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
-  %val = load i8, i8 addrspace(1)* %valptr.gep
+  %valptr.gep = getelementptr i8, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i8, ptr addrspace(1) %valptr.gep
   %cttz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
   %cmp = icmp eq i8 %val, 0
   %sel = select i1 %cmp, i8 -1, i8 %cttz
-  store i8 %sel, i8 addrspace(1)* %out
+  store i8 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+ define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i16_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1480,16 +1480,16 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo
 ; GFX10-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %valptr
+  %val = load i16, ptr addrspace(1) %valptr
   %cttz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone
   %cmp = icmp eq i16 %val, 0
   %sel = select i1 %cmp, i16 -1, i16 %cttz
-  store i16 %sel, i16 addrspace(1)* %out
+  store i16 %sel, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_i7_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1585,11 +1585,11 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out,
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
-  %val = load i7, i7 addrspace(1)* %valptr.gep
+  %valptr.gep = getelementptr i7, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i7, ptr addrspace(1) %valptr.gep
   %cttz = call i7 @llvm.cttz.i7(i7 %val, i1 false) nounwind readnone
   %cmp = icmp eq i7 %val, 0
   %sel = select i1 %cmp, i7 -1, i7 %cttz
-  store i7 %sel, i7 addrspace(1)* %out
+  store i7 %sel, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index ba3ed974a34a..9e4c88f9da40 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -13,7 +13,7 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -60,11 +60,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
-  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  store i32 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -131,14 +131,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out,
 ; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
-  %val = load i32, i32 addrspace(1)* %in.gep, align 4
+  %in.gep = getelementptr i32, ptr addrspace(1) %valptr, i32 %tid
+  %val = load i32, ptr addrspace(1) %in.gep, align 4
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
-  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  store i32 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_v2i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -209,14 +209,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noali
 ; GFX9-GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
+  %in.gep = getelementptr <2 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <2 x i32>, ptr addrspace(1) %in.gep, align 8
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
-  store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
+  store <2 x i32> %cttz, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_v4i32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -295,14 +295,14 @@ define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali
 ; GFX9-GISEL-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
+  %in.gep = getelementptr <4 x i32>, ptr addrspace(1) %valptr, i32 %tid
+  %val = load <4 x i32>, ptr addrspace(1) %in.gep, align 16
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
-  store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %cttz, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noalias %out, i8 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i8_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -369,11 +369,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noa
   %cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i8 %val, 0
   %ret = select i1 %cttz_ret, i8 %cttz, i8 32
-  store i8 %cttz, i8 addrspace(1)* %out, align 4
+  store i8 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) noalias %out, i16 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i16_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -440,11 +440,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(i16 addrspace(1)* n
   %cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i16 %val, 0
   %ret = select i1 %cttz_ret, i16 %cttz, i16 32
-  store i16 %cttz, i16 addrspace(1)* %out, align 4
+  store i16 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i32_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
@@ -493,11 +493,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
   %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i32 %val, 0
   %ret = select i1 %cttz_ret, i32 %cttz, i32 32
-  store i32 %cttz, i32 addrspace(1)* %out, align 4
+  store i32 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
+define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(ptr addrspace(1) noalias %out, i64 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i64_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -560,11 +560,11 @@ define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
   %cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i64 %val, 0
   %ret = select i1 %cttz_ret, i64 %cttz, i64 32
-  store i64 %cttz, i64 addrspace(1)* %out, align 4
+  store i64 %cttz, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_i8_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -643,15 +643,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noa
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, 32, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i8, i8 addrspace(1)* %arrayidx, align 1
+  %val = load i8, ptr addrspace(1) %arrayidx, align 1
   %cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i8 %val, 0
   %ret = select i1 %cttz_ret, i8 %cttz, i8 32
-  store i8 %ret, i8 addrspace(1)* %out, align 4
+  store i8 %ret, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_i16_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -744,15 +744,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* n
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, 32, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %arrayidx, align 1
+  %val = load i16, ptr addrspace(1) %arrayidx, align 1
   %cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i16 %val, 0
   %ret = select i1 %cttz_ret, i16 %cttz, i16 32
-  store i16 %ret, i16 addrspace(1)* %out, align 4
+  store i16 %ret, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_i32_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -865,15 +865,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, 32, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+  %val = load i32, ptr addrspace(1) %arrayidx, align 1
   %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i32 %val, 0
   %ret = select i1 %cttz_ret, i32 %cttz, i32 32
-  store i32 %ret, i32 addrspace(1)* %out, align 4
+  store i32 %ret, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_zero_undef_i64_with_select:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1057,15 +1057,15 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
 ; GFX9-GISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i64, i64 addrspace(1)* %arrayidx, align 1
+  %val = load i64, ptr addrspace(1) %arrayidx, align 1
   %cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone
   %cttz_ret = icmp ne i64 %val, 0
   %ret = select i1 %cttz_ret, i64 %cttz, i64 64
-  store i64 %ret, i64 addrspace(1)* %out, align 4
+  store i64 %ret, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1179,15 +1179,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, -1, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+  %val = load i32, ptr addrspace(1) %arrayidx, align 1
   %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp eq i32 %val, 0
   %sel = select i1 %cmp, i32 -1, i32 %ctlz
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_ne_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1301,15 +1301,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+  %val = load i32, ptr addrspace(1) %arrayidx, align 1
   %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %val, 0
   %sel = select i1 %cmp, i32 %ctlz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_i32_sel_ne_bitwidth:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1431,15 +1431,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, -1, v1, vcc
 ; GFX9-GISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i32, i32 addrspace(1)* %arrayidx, align 1
+  %val = load i32, ptr addrspace(1) %arrayidx, align 1
   %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone
   %cmp = icmp ne i32 %ctlz, 32
   %sel = select i1 %cmp, i32 %ctlz, i32 -1
-  store i32 %sel, i32 addrspace(1)* %out
+  store i32 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+ define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_i8_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1518,15 +1518,15 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i8, i8 addrspace(1)* %arrayidx, align 1
+  %val = load i8, ptr addrspace(1) %arrayidx, align 1
   %ctlz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone
   %cmp = icmp eq i8 %val, 0
   %sel = select i1 %cmp, i8 -1, i8 %ctlz
-  store i8 %sel, i8 addrspace(1)* %out
+  store i8 %sel, ptr addrspace(1) %out
   ret void
 }
 
- define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+ define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; SI-LABEL: v_cttz_i16_sel_eq_neg1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1620,11 +1620,11 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
 ; GFX9-GISEL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-GISEL-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %arrayidx, align 1
+  %val = load i16, ptr addrspace(1) %arrayidx, align 1
   %ctlz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone
   %cmp = icmp eq i16 %val, 0
   %sel = select i1 %cmp, i16 -1, i16 %ctlz
-  store i16 %sel, i16 addrspace(1)* %out
+  store i16 %sel, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
index 0fa06b87eba2..919924730724 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
@@ -3,45 +3,45 @@
 ; GCN-LABEL: and_zext:
 ; GCN: v_and_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @and_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @and_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
-  %a = load i16, i16 addrspace(1)* %in
-  %b = load i16, i16 addrspace(1)* %ptr
+  %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+  %a = load i16, ptr addrspace(1) %in
+  %b = load i16, ptr addrspace(1) %ptr
   %c = add i16 %a, %b
   %val16 = and i16 %c, %a
   %val32 = zext i16 %val16 to i32
-  store i32 %val32, i32 addrspace(1)* %out
+  store i32 %val32, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: or_zext:
 ; GCN: v_or_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @or_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @or_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
-  %a = load i16, i16 addrspace(1)* %in
-  %b = load i16, i16 addrspace(1)* %ptr
+  %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+  %a = load i16, ptr addrspace(1) %in
+  %b = load i16, ptr addrspace(1) %ptr
   %c = add i16 %a, %b
   %val16 = or i16 %c, %a
   %val32 = zext i16 %val16 to i32
-  store i32 %val32, i32 addrspace(1)* %out
+  store i32 %val32, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: xor_zext:
 ; GCN: v_xor_b32_e32 [[VAL16:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[VAL16]]
-define amdgpu_kernel void @xor_zext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @xor_zext(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i16, i16 addrspace(1)* %in, i32 %id
-  %a = load i16, i16 addrspace(1)* %in
-  %b = load i16, i16 addrspace(1)* %ptr
+  %ptr = getelementptr i16, ptr addrspace(1) %in, i32 %id
+  %a = load i16, ptr addrspace(1) %in
+  %b = load i16, ptr addrspace(1) %ptr
   %c = add i16 %a, %b
   %val16 = xor i16 %c, %a
   %val32 = zext i16 %val16 to i32
-  store i32 %val32, i32 addrspace(1)* %out
+  store i32 %val32, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index ed94e9ac7f3c..9773f4d7908a 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -10,7 +10,7 @@ declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
 declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
 
-define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @fshl_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
 ; SI-LABEL: fshl_i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -105,11 +105,11 @@ define amdgpu_kernel void @fshl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
-  store i32 %0, i32 addrspace(1)* %in
+  store i32 %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @fshl_i32_imm(ptr addrspace(1) %in, i32 %x, i32 %y) {
 ; SI-LABEL: fshl_i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -176,11 +176,11 @@ define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
-  store i32 %0, i32 addrspace(1)* %in
+  store i32 %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+define amdgpu_kernel void @fshl_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
 ; SI-LABEL: fshl_v2i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -307,11 +307,11 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
-  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+  store <2 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @fshl_v2i32_imm(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y) {
 ; SI-LABEL: fshl_v2i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -393,11 +393,11 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
-  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+  store <2 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+define amdgpu_kernel void @fshl_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; SI-LABEL: fshl_v4i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -584,11 +584,11 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
-  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+  store <4 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @fshl_v4i32_imm(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y) {
 ; SI-LABEL: fshl_v4i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -690,12 +690,12 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
-  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+  store <4 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
 ; (a ^ b) | a --> a | b
-define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
+define amdgpu_kernel void @orxor2or1(ptr addrspace(1) %in, i32 %a, i32 %b) {
 ; SI-LABEL: orxor2or1:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -786,6 +786,6 @@ define amdgpu_kernel void @orxor2or1(i32 addrspace(1)* %in, i32 %a, i32 %b) {
   %fshl = call i32 @llvm.fshl.i32(i32 %or, i32 %xor, i32 7)
   %cond = icmp eq i32 %fshl, 0
   %r = select i1 %cond, i32 %a, i32 %b
-  store i32 %r, i32 addrspace(1)* %in
+  store i32 %r, ptr addrspace(1) %in
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll
index ece946d355e2..5d3be8db524b 100644
--- a/llvm/test/CodeGen/AMDGPU/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshr.ll
@@ -19,7 +19,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
 declare i24 @llvm.fshr.i24(i24, i24, i24)
 declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>)
 
-define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) {
+define amdgpu_kernel void @fshr_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
 ; SI-LABEL: fshr_i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -97,11 +97,11 @@ define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
-  store i32 %0, i32 addrspace(1)* %in
+  store i32 %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
+define amdgpu_kernel void @fshr_i32_imm(ptr addrspace(1) %in, i32 %x, i32 %y) {
 ; SI-LABEL: fshr_i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -168,11 +168,11 @@ define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
-  store i32 %0, i32 addrspace(1)* %in
+  store i32 %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
+define amdgpu_kernel void @fshr_v2i32(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
 ; SI-LABEL: fshr_v2i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -269,11 +269,11 @@ define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z)
-  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+  store <2 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @fshr_v2i32_imm(ptr addrspace(1) %in, <2 x i32> %x, <2 x i32> %y) {
 ; SI-LABEL: fshr_v2i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
@@ -355,11 +355,11 @@ define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
-  store <2 x i32> %0, <2 x i32> addrspace(1)* %in
+  store <2 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+define amdgpu_kernel void @fshr_v4i32(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; SI-LABEL: fshr_v4i32:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -486,11 +486,11 @@ define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
-  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+  store <4 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 
-define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @fshr_v4i32_imm(ptr addrspace(1) %in, <4 x i32> %x, <4 x i32> %y) {
 ; SI-LABEL: fshr_v4i32_imm:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
@@ -590,7 +590,7 @@ define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
 ; GFX11-NEXT:    s_endpgm
 entry:
   %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>)
-  store <4 x i32> %0, <4 x i32> addrspace(1)* %in
+  store <4 x i32> %0, ptr addrspace(1) %in
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
index 6ac24a99fbb1..a955523a2b5e 100644
--- a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
+++ b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 
-define amdgpu_kernel void @divergent_or3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_or3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_or3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -16,20 +16,19 @@ define amdgpu_kernel void @divergent_or3_b32(<3 x i32> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+  %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = or i32 %i5, %i4
   %i8 = or i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %i2, align 16
   ret void
 }
 
-define amdgpu_kernel void @divergent_or3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_or3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_or3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -47,20 +46,19 @@ define amdgpu_kernel void @divergent_or3_b64(<3 x i64> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+  %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = or i64 %i5, %i4
   %i8 = or i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %i2, align 32
   ret void
 }
 
-define amdgpu_kernel void @divergent_and3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_and3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_and3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -76,20 +74,19 @@ define amdgpu_kernel void @divergent_and3_b32(<3 x i32> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+  %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = and i32 %i5, %i4
   %i8 = and i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %i2, align 16
   ret void
 }
 
-define amdgpu_kernel void @divergent_and3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_and3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_and3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -110,20 +107,19 @@ define amdgpu_kernel void @divergent_and3_b64(<3 x i64> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+  %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = and i64 %i5, %i4
   %i8 = and i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %i2, align 32
   ret void
 }
 
-define amdgpu_kernel void @divergent_xor3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_xor3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_xor3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -138,20 +134,19 @@ define amdgpu_kernel void @divergent_xor3_b32(<3 x i32> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %i2, align 16
+  %i2 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i32>, ptr addrspace(1) %i2, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = xor i32 %i5, %i4
   %i8 = xor i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %i2, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %i2, align 16
   ret void
 }
 
-define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @divergent_xor3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: divergent_xor3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -170,20 +165,19 @@ define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
 bb:
   %i = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i1 = zext i32 %i to i64
-  %i2 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 %i1
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %i2, align 32
+  %i2 = getelementptr inbounds <3 x i64>, ptr addrspace(1) %arg, i64 %i1
+  %i3 = load <3 x i64>, ptr addrspace(1) %i2, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = xor i64 %i5, %i4
   %i8 = xor i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %i2, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %i2, align 32
   ret void
 }
 
-define amdgpu_kernel void @uniform_or3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_or3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_or3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -197,19 +191,18 @@ define amdgpu_kernel void @uniform_or3_b32(<3 x i32> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dword v0, v1, s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+  %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = or i32 %i5, %i4
   %i8 = or i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %arg, align 16
   ret void
 }
 
-define amdgpu_kernel void @uniform_or3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_or3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_or3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -225,19 +218,18 @@ define amdgpu_kernel void @uniform_or3_b64(<3 x i64> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+  %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = or i64 %i5, %i4
   %i8 = or i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %arg, align 32
   ret void
 }
 
-define amdgpu_kernel void @uniform_and3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_and3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_and3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -251,19 +243,18 @@ define amdgpu_kernel void @uniform_and3_b32(<3 x i32> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dword v0, v1, s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+  %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = and i32 %i5, %i4
   %i8 = and i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %arg, align 16
   ret void
 }
 
-define amdgpu_kernel void @uniform_and3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_and3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_and3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -279,19 +270,18 @@ define amdgpu_kernel void @uniform_and3_b64(<3 x i64> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+  %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = and i64 %i5, %i4
   %i8 = and i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %arg, align 32
   ret void
 }
 
-define amdgpu_kernel void @uniform_xor3_b32(<3 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_xor3_b32(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_xor3_b32:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -305,19 +295,18 @@ define amdgpu_kernel void @uniform_xor3_b32(<3 x i32> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dword v0, v1, s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i32>, <3 x i32> addrspace(1)* %arg, align 16
+  %i3 = load <3 x i32>, ptr addrspace(1) %arg, align 16
   %i4 = extractelement <3 x i32> %i3, i64 0
   %i5 = extractelement <3 x i32> %i3, i64 1
   %i6 = extractelement <3 x i32> %i3, i64 2
   %i7 = xor i32 %i5, %i4
   %i8 = xor i32 %i7, %i6
   %i9 = xor i32 %i8, -1
-  %i10 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %arg, i64 0, i64 0
-  store i32 %i9, i32 addrspace(1)* %i10, align 16
+  store i32 %i9, ptr addrspace(1) %arg, align 16
   ret void
 }
 
-define amdgpu_kernel void @uniform_xor3_b64(<3 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @uniform_xor3_b64(ptr addrspace(1) %arg) {
 ; GCN-LABEL: uniform_xor3_b64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -333,15 +322,14 @@ define amdgpu_kernel void @uniform_xor3_b64(<3 x i64> addrspace(1)* %arg) {
 ; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %i3 = load <3 x i64>, <3 x i64> addrspace(1)* %arg, align 32
+  %i3 = load <3 x i64>, ptr addrspace(1) %arg, align 32
   %i4 = extractelement <3 x i64> %i3, i64 0
   %i5 = extractelement <3 x i64> %i3, i64 1
   %i6 = extractelement <3 x i64> %i3, i64 2
   %i7 = xor i64 %i5, %i4
   %i8 = xor i64 %i7, %i6
   %i9 = xor i64 %i8, -1
-  %i10 = getelementptr inbounds <3 x i64>, <3 x i64> addrspace(1)* %arg, i64 0, i64 0
-  store i64 %i9, i64 addrspace(1)* %i10, align 32
+  store i64 %i9, ptr addrspace(1) %arg, align 32
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
index 61a4f8fb32cd..2821f8e696f0 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl-add-u64.ll
@@ -37,20 +37,20 @@ define i64 @lshl_add_u64_vvv(i64 %v, i64 %s, i64 %a) {
 define amdgpu_kernel void @lshl_add_u64_s2v(i64 %v) {
 ; GCN-LABEL: lshl_add_u64_s2v:
 ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 2, v[{{[0-9:]+}}]
-  %a = load i64, i64* undef
+  %a = load i64, ptr undef
   %shl = shl i64 %v, 2
   %add = add i64 %shl, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
 define amdgpu_kernel void @lshl_add_u64_v2s(i64 %a) {
 ; GCN-LABEL: lshl_add_u64_v2s:
 ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 2, s[{{[0-9:]+}}]
-  %v = load i64, i64* undef
+  %v = load i64, ptr undef
   %shl = shl i64 %v, 2
   %add = add i64 %shl, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
@@ -61,7 +61,7 @@ define amdgpu_kernel void @lshl_add_u64_s2s(i64 %v, i64 %a) {
 ; GCN:    s_addc_u32
   %shl = shl i64 %v, 2
   %add = add i64 %shl, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
@@ -75,18 +75,18 @@ define i64 @add_u64_vv(i64 %v, i64 %a) {
 define amdgpu_kernel void @add_u64_sv(i64 %v) {
 ; GCN-LABEL: add_u64_sv:
 ; GCN: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
-  %a = load i64, i64* undef
+  %a = load i64, ptr undef
   %add = add i64 %v, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
 define amdgpu_kernel void @add_u64_vs(i64 %a) {
 ; GCN-LABEL: add_u64_vs:
 ; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
-  %v = load i64, i64* undef
+  %v = load i64, ptr undef
   %add = add i64 %v, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
@@ -95,14 +95,14 @@ define amdgpu_kernel void @add_u64_ss(i64 %v, i64 %a) {
 ; GCN: s_add_u32
 ; GCN: s_addc_u32 s1, s1, s3
   %add = add i64 %v, %a
-  store i64 %add, i64* undef
+  store i64 %add, ptr undef
   ret void
 }
 
-define i32 @lshl_add_u64_gep(i32 *%p, i64 %a) {
+define i32 @lshl_add_u64_gep(ptr %p, i64 %a) {
 ; GCN-LABEL: lshl_add_u64_gep:
 ; GCN: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
-  %gep = getelementptr inbounds i32, i32* %p, i64 %a
-  %v = load i32, i32* %gep
+  %gep = getelementptr inbounds i32, ptr %p, i64 %a
+  %v = load i32, ptr %gep
   ret i32 %v
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
index 2d2d6161869d..7830bfc6ac7f 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-- -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @zext_shl64_to_32(ptr addrspace(1) nocapture %out, i32 %x) {
 ; GCN-LABEL: zext_shl64_to_32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -17,11 +17,11 @@ define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i3
   %and = and i32 %x, 1073741823
   %ext = zext i32 %and to i64
   %shl = shl i64 %ext, 2
-  store i64 %shl, i64 addrspace(1)* %out, align 4
+  store i64 %shl, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @sext_shl64_to_32(ptr addrspace(1) nocapture %out, i32 %x) {
 ; GCN-LABEL: sext_shl64_to_32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -38,11 +38,11 @@ define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i3
   %and = and i32 %x, 536870911
   %ext = sext i32 %and to i64
   %shl = shl i64 %ext, 2
-  store i64 %shl, i64 addrspace(1)* %out, align 4
+  store i64 %shl, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @zext_shl64_overflow(ptr addrspace(1) nocapture %out, i32 %x) {
 ; GCN-LABEL: zext_shl64_overflow:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -60,11 +60,11 @@ define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out,
   %and = and i32 %x, 2147483647
   %ext = zext i32 %and to i64
   %shl = shl i64 %ext, 2
-  store i64 %shl, i64 addrspace(1)* %out, align 4
+  store i64 %shl, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) {
+define amdgpu_kernel void @sext_shl64_overflow(ptr addrspace(1) nocapture %out, i32 %x) {
 ; GCN-LABEL: sext_shl64_overflow:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -82,11 +82,11 @@ define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out,
   %and = and i32 %x, 2147483647
   %ext = sext i32 %and to i64
   %shl = shl i64 %ext, 2
-  store i64 %shl, i64 addrspace(1)* %out, align 4
+  store i64 %shl, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
 ; GCN-LABEL: mulu24_shl64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -104,12 +104,12 @@ bb:
   %tmp1 = and i32 %tmp, 6
   %mulconv = mul nuw nsw i32 %tmp1, 7
   %tmp2 = zext i32 %mulconv to i64
-  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2
-  store i32 0, i32 addrspace(1)* %tmp3, align 4
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp2
+  store i32 0, ptr addrspace(1) %tmp3, align 4
   ret void
 }
 
-define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) {
+define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) {
 ; GCN-LABEL: muli24_shl64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -132,14 +132,14 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = sext i32 %tmp to i64
-  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2
-  %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp2
+  %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
   %tmp5 = or i32 %tmp4, -8388608
   %tmp6 = mul nsw i32 %tmp5, -7
   %tmp7 = zext i32 %tmp6 to i64
   %tmp8 = shl nuw nsw i64 %tmp7, 3
-  %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2
-  store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8
+  %tmp9 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %tmp2
+  store i64 %tmp8, ptr addrspace(1) %tmp9, align 8
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index dede51844b1c..f885325d38f7 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_lshr_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
 ; GFX9-LABEL: s_lshr_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -71,11 +71,11 @@ define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %result = lshr <2 x i16> %lhs, %rhs
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: v_lshr_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -152,17 +152,17 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <2 x i16>, ptr addrspace(1) %in.gep
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = lshr <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
 ; GFX9-LABEL: lshr_v_s_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -246,15 +246,15 @@ define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = lshr <2 x i16> %vgpr, %sgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @lshr_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
 ; GFX9-LABEL: lshr_s_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -338,15 +338,15 @@ define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = lshr <2 x i16> %sgpr, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: lshr_imm_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -422,15 +422,15 @@ define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = lshr <2 x i16> <i16 8, i16 8>, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: lshr_v_imm_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -502,15 +502,15 @@ define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = lshr <2 x i16> %vgpr, <i16 8, i16 8>
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_lshr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: v_lshr_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -600,17 +600,17 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <4 x i16>, ptr addrspace(1) %in.gep
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = lshr <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: lshr_v_imm_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -690,11 +690,11 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
   %result = lshr <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/nor.ll b/llvm/test/CodeGen/AMDGPU/nor.ll
index 6781ab6745f0..df8875b5563b 100644
--- a/llvm/test/CodeGen/AMDGPU/nor.ll
+++ b/llvm/test/CodeGen/AMDGPU/nor.ll
@@ -6,11 +6,11 @@
 ; GCN-LABEL: {{^}}scalar_nor_i32_one_use
 ; GCN: s_nor_b32
 define amdgpu_kernel void @scalar_nor_i32_one_use(
-    i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+    ptr addrspace(1) %r0, i32 %a, i32 %b) {
 entry:
   %or = or i32 %a, %b
   %r0.val = xor i32 %or, -1
-  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -20,24 +20,24 @@ entry:
 ; GCN: s_not_b32
 ; GCN: s_add_i32
 define amdgpu_kernel void @scalar_nor_i32_mul_use(
-    i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) {
+    ptr addrspace(1) %r0, ptr addrspace(1) %r1, i32 %a, i32 %b) {
 entry:
   %or = or i32 %a, %b
   %r0.val = xor i32 %or, -1
   %r1.val = add i32 %or, %a
-  store i32 %r0.val, i32 addrspace(1)* %r0
-  store i32 %r1.val, i32 addrspace(1)* %r1
+  store i32 %r0.val, ptr addrspace(1) %r0
+  store i32 %r1.val, ptr addrspace(1) %r1
   ret void
 }
 
 ; GCN-LABEL: {{^}}scalar_nor_i64_one_use
 ; GCN: s_nor_b64
 define amdgpu_kernel void @scalar_nor_i64_one_use(
-    i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+    ptr addrspace(1) %r0, i64 %a, i64 %b) {
 entry:
   %or = or i64 %a, %b
   %r0.val = xor i64 %or, -1
-  store i64 %r0.val, i64 addrspace(1)* %r0
+  store i64 %r0.val, ptr addrspace(1) %r0
   ret void
 }
 
@@ -48,13 +48,13 @@ entry:
 ; GCN: s_add_u32
 ; GCN: s_addc_u32
 define amdgpu_kernel void @scalar_nor_i64_mul_use(
-    i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) {
+    ptr addrspace(1) %r0, ptr addrspace(1) %r1, i64 %a, i64 %b) {
 entry:
   %or = or i64 %a, %b
   %r0.val = xor i64 %or, -1
   %r1.val = add i64 %or, %a
-  store i64 %r0.val, i64 addrspace(1)* %r0
-  store i64 %r1.val, i64 addrspace(1)* %r1
+  store i64 %r0.val, ptr addrspace(1) %r0
+  store i64 %r1.val, ptr addrspace(1) %r1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index c2e615e97132..8f82a53cafe1 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -9,12 +9,12 @@
 
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @or_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = or <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -28,37 +28,37 @@ define amdgpu_kernel void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addr
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
+define amdgpu_kernel void @or_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = or <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}scalar_or_i32:
 ; SI: s_or_b32
-define amdgpu_kernel void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+define amdgpu_kernel void @scalar_or_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %or = or i32 %a, %b
-  store i32 %or, i32 addrspace(1)* %out
+  store i32 %or, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}vector_or_i32:
 ; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
-  %loada = load i32, i32 addrspace(1)* %a
+define amdgpu_kernel void @vector_or_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, i32 %b) {
+  %loada = load i32, ptr addrspace(1) %a
   %or = or i32 %loada, %b
-  store i32 %or, i32 addrspace(1)* %out
+  store i32 %or, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}scalar_or_literal_i32:
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
-define amdgpu_kernel void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
+define amdgpu_kernel void @scalar_or_literal_i32(ptr addrspace(1) %out, i32 %a) {
   %or = or i32 %a, 99999
-  store i32 %or, i32 addrspace(1)* %out, align 4
+  store i32 %or, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -68,9 +68,9 @@ define amdgpu_kernel void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a)
 ; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
 ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]]
-define amdgpu_kernel void @scalar_or_literal_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_literal_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
   %or = or i64 %a, 4261135838621753
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -82,12 +82,12 @@ define amdgpu_kernel void @scalar_or_literal_i64(i64 addrspace(1)* %out, [8 x i3
 
 ; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3039
 ; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xf237b
-define amdgpu_kernel void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @scalar_or_literal_multi_use_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
   %or = or i64 %a, 4261135838621753
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
 
   %foo = add i64 %b, 4261135838621753
-  store volatile i64 %foo, i64 addrspace(1)* undef
+  store volatile i64 %foo, ptr addrspace(1) undef
   ret void
 }
 
@@ -101,9 +101,9 @@ define amdgpu_kernel void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %ou
 ; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
 ; SI-NOT: or_b32
 ; SI: buffer_store_dwordx2 v[[[VLO]]:[[VHI]]]
-define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
   %or = or i64 %a, 63
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -111,11 +111,11 @@ define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x
 ; SI-NOT: or_b32
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 63
 ; SI-NOT: or_b32
-define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
   %or = or i64 %a, 63
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   %foo = add i64 %b, 63
-  store volatile i64 %foo, i64 addrspace(1)* undef
+  store volatile i64 %foo, ptr addrspace(1) undef
   ret void
 }
 
@@ -125,27 +125,27 @@ define amdgpu_kernel void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)*
 ; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}}
 ; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]]
 ; SI: buffer_store_dwordx2 v[[[V_LO]]:[[V_HI]]]
-define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @scalar_or_neg_inline_imm_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
   %or = or i64 %a, -8
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}vector_or_literal_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
-define amdgpu_kernel void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32, i32 addrspace(1)* %a, align 4
+define amdgpu_kernel void @vector_or_literal_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i32, ptr addrspace(1) %a, align 4
   %or = or i32 %loada, 65535
-  store i32 %or, i32 addrspace(1)* %out, align 4
+  store i32 %or, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
-define amdgpu_kernel void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32, i32 addrspace(1)* %a, align 4
+define amdgpu_kernel void @vector_or_inline_immediate_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i32, ptr addrspace(1) %a, align 4
   %or = or i32 %loada, 4
-  store i32 %or, i32 addrspace(1)* %out, align 4
+  store i32 %or, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -154,30 +154,30 @@ define amdgpu_kernel void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out
 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
 
 ; SI: s_or_b64
-define amdgpu_kernel void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+define amdgpu_kernel void @scalar_or_i64(ptr addrspace(1) %out, i64 %a, i64 %b) {
   %or = or i64 %a, %b
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}vector_or_i64:
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64, i64 addrspace(1)* %a, align 8
-  %loadb = load i64, i64 addrspace(1)* %b, align 8
+define amdgpu_kernel void @vector_or_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i64, ptr addrspace(1) %a, align 8
+  %loadb = load i64, ptr addrspace(1) %b, align 8
   %or = or i64 %loada, %loadb
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}scalar_vector_or_i64:
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
-define amdgpu_kernel void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
-  %loada = load i64, i64 addrspace(1)* %a
+define amdgpu_kernel void @scalar_vector_or_i64(ptr addrspace(1) %out, ptr addrspace(1) %a, i64 %b) {
+  %loada = load i64, ptr addrspace(1) %a
   %or = or i64 %loada, %b
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -186,10 +186,10 @@ define amdgpu_kernel void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addr
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
 ; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_loadimm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i64, ptr addrspace(1) %a, align 8
   %or = or i64 %loada, 22470723082367
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -200,10 +200,10 @@ define amdgpu_kernel void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 add
 ; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0
 ; SI: buffer_store_dwordx2 v[[[LO_RESULT]]:[[HI_VREG]]]
 ; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i64, ptr addrspace(1) %a, align 8
   %or = or i64 %loada, 8
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -213,10 +213,10 @@ define amdgpu_kernel void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspa
 ; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}}
 ; SI: buffer_store_dwordx2 v[[[RES_LO]]:[[RES_HI]]]
 ; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_neg_inline_imm(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i64, ptr addrspace(1) %a, align 8
   %or = or i64 %loada, -8
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -226,10 +226,10 @@ define amdgpu_kernel void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out,
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffff38, v[[LO_VREG]]
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
-define amdgpu_kernel void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64, i64 addrspace(1)* %a, align 8
+define amdgpu_kernel void @vector_or_i64_neg_literal(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) {
+  %loada = load i64, ptr addrspace(1) %a, align 8
   %or = or i64 %loada, -200
-  store i64 %or, i64 addrspace(1)* %out
+  store i64 %or, ptr addrspace(1) %out
   ret void
 }
 
@@ -239,10 +239,10 @@ define amdgpu_kernel void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64
 ; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
 ; SI: buffer_store_dword [[VRESULT]],
-define amdgpu_kernel void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @trunc_i64_or_to_i32(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
   %add = or i64 %b, %a
   %trunc = trunc i64 %add to i32
-  store i32 %trunc, i32 addrspace(1)* %out, align 8
+  store i32 %trunc, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -250,14 +250,14 @@ define amdgpu_kernel void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, [8 x i32]
 ; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], vcc
-define amdgpu_kernel void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float, float addrspace(1)* %in0
-  %b = load float, float addrspace(1)* %in1
+define amdgpu_kernel void @or_i1(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
+  %a = load float, ptr addrspace(1) %in0
+  %b = load float, ptr addrspace(1) %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 0.000000e+00
   %or = or i1 %acmp, %bcmp
   %result = zext i1 %or to i32
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -267,10 +267,10 @@ define amdgpu_kernel void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in
 ; SI: s_cmp_eq_u32
 ; SI: s_cselect_b64 [[C2:[^,]+]], -1, 0
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], [[C1]], [[C2]]
-define amdgpu_kernel void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_kernel void @s_or_i1(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) {
   %cmp0 = icmp eq i32 %a, %b
   %cmp1 = icmp eq i32 %c, %d
   %or = or i1 %cmp0, %cmp1
-  store i1 %or, i1 addrspace(1)* %out
+  store i1 %or, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/permute.ll b/llvm/test/CodeGen/AMDGPU/permute.ll
index b0b684e770ff..c6671fa51795 100644
--- a/llvm/test/CodeGen/AMDGPU/permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh8_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: lsh8_or_and:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -19,16 +19,16 @@ define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %ar
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = shl i32 %tmp, 8
   %tmp3 = and i32 %arg1, 255
   %tmp4 = or i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsr24_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: lsr24_or_and:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -46,16 +46,16 @@ define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %a
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = lshr i32 %tmp, 24
   %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
   %tmp4 = or i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: and_or_lsr24:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -74,17 +74,17 @@ define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %a
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
   %tmp3 = lshr i32 %arg1, 24
   %tmp4 = or i32 %tmp2, %tmp3
   %tmp5 = xor i32 %tmp4, -2147483648
-  store i32 %tmp5, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp5, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: and_or_and:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -102,16 +102,16 @@ define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = and i32 %tmp, -16711936
   %tmp3 = and i32 %arg1, 16711935
   %tmp4 = or i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh8_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: lsh8_or_lsr24:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -128,16 +128,16 @@ define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = shl i32 %tmp, 8
   %tmp3 = lshr i32 %arg1, 24
   %tmp4 = or i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @lsh16_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: lsh16_or_lsr24:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -155,16 +155,16 @@ define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = shl i32 %tmp, 16
   %tmp3 = lshr i32 %arg1, 24
   %tmp4 = or i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_xor_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: and_xor_and:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -182,17 +182,17 @@ define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %ar
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = and i32 %tmp, -16776961
   %tmp3 = and i32 %arg1, 16776960
   %tmp4 = xor i32 %tmp2, %tmp3
-  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp4, ptr addrspace(1) %gep, align 4
   ret void
 }
 
 ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
-define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: and_or_or_and:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -212,17 +212,17 @@ define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %and = and i32 %tmp, 16711935     ; 0x00ff00ff
   %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
   %tmp2 = or i32 %tmp1, -65536
   %tmp3 = or i32 %tmp2, %and
-  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp3, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @and_or_and_shl(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: and_or_and_shl:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -240,17 +240,17 @@ define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = shl i32 %tmp, 16
   %tmp3 = and i32 %arg1, 65535
   %tmp4 = or i32 %tmp2, %tmp3
   %and = and i32 %tmp4, 4278190335
-  store i32 %and, i32 addrspace(1)* %gep, align 4
+  store i32 %and, ptr addrspace(1) %gep, align 4
   ret void
 }
 
-define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @or_and_or(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: or_and_or:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -268,17 +268,17 @@ define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %or1 = or i32 %tmp, 16776960    ; 0x00ffff00
   %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
   %and = and i32 %or1, %or2
-  store i32 %and, i32 addrspace(1)* %gep, align 4
+  store i32 %and, ptr addrspace(1) %gep, align 4
   ret void
 }
 
 ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
-define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_ffff0500(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: known_ffff0500:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -304,21 +304,21 @@ define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %load = load i32, ptr addrspace(1) %gep, align 4
   %mask1 = or i32 %arg1, 32768 ; 0x8000
   %mask2 = or i32 %load, 4
   %and = and i32 %mask2, 16711935     ; 0x00ff00ff
   %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
   %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
   %tmp3 = or i32 %tmp2, %and
-  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp3, ptr addrspace(1) %gep, align 4
   %v = and i32 %tmp3, 4294934532 ; 0xffff8004
-  store i32 %v, i32 addrspace(1)* %arg, align 4
+  store i32 %v, ptr addrspace(1) %arg, align 4
   ret void
 }
 
-define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_050c0c00(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: known_050c0c00:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -341,20 +341,20 @@ define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %tmp = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %tmp = load i32, ptr addrspace(1) %gep, align 4
   %tmp2 = shl i32 %tmp, 16
   %mask = or i32 %arg1, 4
   %tmp3 = and i32 %mask, 65535
   %tmp4 = or i32 %tmp2, %tmp3
   %and = and i32 %tmp4, 4278190335
-  store i32 %and, i32 addrspace(1)* %gep, align 4
+  store i32 %and, ptr addrspace(1) %gep, align 4
   %v = and i32 %and, 16776964
-  store i32 %v, i32 addrspace(1)* %arg, align 4
+  store i32 %v, ptr addrspace(1) %arg, align 4
   ret void
 }
 
-define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
+define amdgpu_kernel void @known_ffff8004(ptr addrspace(1) nocapture %arg, i32 %arg1) {
 ; GCN-LABEL: known_ffff8004:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -378,17 +378,17 @@ define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32
 ; GCN-NEXT:    s_endpgm
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
-  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
+  %load = load i32, ptr addrspace(1) %gep, align 4
   %mask1 = or i32 %arg1, 4
   %mask2 = or i32 %load, 32768 ; 0x8000
   %and = and i32 %mask1, 16711935     ; 0x00ff00ff
   %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
   %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
   %tmp3 = or i32 %tmp2, %and
-  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
+  store i32 %tmp3, ptr addrspace(1) %gep, align 4
   %v = and i32 %tmp3, 4294934532 ; 0xffff8004
-  store i32 %v, i32 addrspace(1)* %arg, align 4
+  store i32 %v, ptr addrspace(1) %arg, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index 7104f1dac006..885dfdedfcea 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; Extract the high bit of the 1st quarter
-define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 ; GCN-LABEL: v_uextract_bit_31_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -24,17 +24,17 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i128, i128 addrspace(1)* %in.gep
+  %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i128, ptr addrspace(1) %in.gep
   %srl = lshr i128 %ld.64, 31
   %bit = and i128 %srl, 1
-  store i128 %bit, i128 addrspace(1)* %out.gep
+  store i128 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; Extract the high bit of the 2nd quarter
-define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 ; GCN-LABEL: v_uextract_bit_63_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -55,17 +55,17 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i128, i128 addrspace(1)* %in.gep
+  %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i128, ptr addrspace(1) %in.gep
   %srl = lshr i128 %ld.64, 63
   %bit = and i128 %srl, 1
-  store i128 %bit, i128 addrspace(1)* %out.gep
+  store i128 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; Extract the high bit of the 3rd quarter
-define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_95_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 ; GCN-LABEL: v_uextract_bit_95_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -88,17 +88,17 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i128, i128 addrspace(1)* %in.gep
+  %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i128, ptr addrspace(1) %in.gep
   %srl = lshr i128 %ld.64, 95
   %bit = and i128 %srl, 1
-  store i128 %bit, i128 addrspace(1)* %out.gep
+  store i128 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; Extract the high bit of the 4th quarter
-define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_127_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 ; GCN-LABEL: v_uextract_bit_127_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -119,17 +119,17 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i128, i128 addrspace(1)* %in.gep
+  %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i128, ptr addrspace(1) %in.gep
   %srl = lshr i128 %ld.64, 127
   %bit = and i128 %srl, 1
-  store i128 %bit, i128 addrspace(1)* %out.gep
+  store i128 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; Spans more than 2 dword boundaries
-define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 ; GCN-LABEL: v_uextract_bit_34_100_i128:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -150,12 +150,12 @@ define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i
 ; GCN-NEXT:    buffer_store_dwordx4 v[4:7], v[8:9], s[0:3], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i128, i128 addrspace(1)* %in.gep
+  %in.gep = getelementptr i128, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i128, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i128, ptr addrspace(1) %in.gep
   %srl = lshr i128 %ld.64, 34
   %bit = and i128 %srl, 73786976294838206463
-  store i128 %bit, i128 addrspace(1)* %out.gep
+  store i128 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index 378212dc326f..42bfbbe19dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -9,14 +9,14 @@
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 31
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -27,14 +27,14 @@ define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 add
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 63
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -43,14 +43,14 @@ define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 add
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 1
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -59,14 +59,14 @@ define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addr
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 20
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -76,14 +76,14 @@ define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 add
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}}
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 32
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -93,14 +93,14 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 33
   %bit = and i64 %srl, 1
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -109,14 +109,14 @@ define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 add
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_20_21_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 20
   %bit = and i64 %srl, 3
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -125,14 +125,14 @@ define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_30_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 1
   %bit = and i64 %srl, 1073741823
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -141,14 +141,14 @@ define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 a
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHIFT]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_1_31_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 1
   %bit = and i64 %srl, 2147483647
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -160,14 +160,14 @@ define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 a
 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 31
   %bit = and i64 %srl, 3
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -177,14 +177,14 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_32_33_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 33
   %bit = and i64 %srl, 3
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -195,14 +195,14 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64
 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_30_60_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 30
   %bit = and i64 %srl, 1073741823
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -212,14 +212,14 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO1]]]
-define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 33
   %bit = and i64 %srl, 1073741823
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -228,14 +228,14 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64
 ; GCN: buffer_load_dwordx2 v[[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]]
 ; GCN: v_alignbit_b32 v[[SHRLO:[0-9]+]], v[[VALHI]], v[[VALLO]], 31
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 31
   %and = and i64 %srl, 4294967295
-  store i64 %and, i64 addrspace(1)* %out
+  store i64 %and, ptr addrspace(1) %out
   ret void
 }
 
@@ -244,15 +244,15 @@ define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
 ; GCN: buffer_store_dword v[[SHIFT]]
-define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 31
   %trunc = trunc i64 %srl to i32
   %bit = and i32 %trunc, 1
-  store i32 %bit, i32 addrspace(1)* %out.gep
+  store i32 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -260,15 +260,15 @@ define amdgpu_kernel void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %ou
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
 ; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 3
   %trunc = trunc i64 %srl to i32
   %bit = and i32 %trunc, 1
-  store i32 %bit, i32 addrspace(1)* %out.gep
+  store i32 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -276,15 +276,15 @@ define amdgpu_kernel void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
 ; GCN: buffer_store_dword [[BFE]]
-define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 33
   %trunc = trunc i64 %srl to i32
   %bit = and i32 %trunc, 1
-  store i32 %bit, i32 addrspace(1)* %out.gep
+  store i32 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -294,15 +294,15 @@ define amdgpu_kernel void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %ou
 ; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
 ; GCN-NOT: v[[SHRLO]]
 ; GCN: buffer_store_dword v[[SHRLO]]
-define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 31
   %trunc = trunc i64 %srl to i32
   %bit = and i32 %trunc, 3
-  store i32 %bit, i32 addrspace(1)* %out.gep
+  store i32 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -315,14 +315,14 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)*
 ; GCN-NOT: v[[SHRLO]]
 ; GCN-NOT: v[[SHRHI]]
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
-define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @and_not_mask_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 20
   %bit = and i64 %srl, 4
-  store i64 %bit, i64 addrspace(1)* %out.gep
+  store i64 %bit, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -336,15 +336,15 @@ define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspac
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[SHRLO]]:[[SHRHI]]]
 ; GCN: buffer_store_dwordx2 v[[[AND]]:[[ZERO]]]
-define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 27
   %bit = and i64 %srl, 3
-  store volatile i64 %srl, i64 addrspace(1)* %out
-  store volatile i64 %bit, i64 addrspace(1)* %out
+  store volatile i64 %srl, ptr addrspace(1) %out
+  store volatile i64 %bit, ptr addrspace(1) %out
   ret void
 }
 
@@ -356,15 +356,15 @@ define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspac
 ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
 ; GCN-DAG: buffer_store_dwordx2 v[[[SHR]]:[[ZERO_SHR]]]
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO_BFE]]]
-define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 34
   %bit = and i64 %srl, 7
-  store volatile i64 %srl, i64 addrspace(1)* %out
-  store volatile i64 %bit, i64 addrspace(1)* %out
+  store volatile i64 %srl, ptr addrspace(1) %out
+  store volatile i64 %bit, ptr addrspace(1) %out
   ret void
 }
 
@@ -374,19 +374,19 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspac
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:{{[0-9]+\]}}
 ; GCN: buffer_store_dword v[[ZERO]]
-define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
+define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
-  %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
-  %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
-  %ld.64 = load i64, i64 addrspace(1)* %in.gep
+  %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %id.x
+  %out0.gep = getelementptr i64, ptr addrspace(1) %out0, i32 %id.x
+  %out1.gep = getelementptr i32, ptr addrspace(1) %out1, i32 %id.x
+  %ld.64 = load i64, ptr addrspace(1) %in.gep
   %srl = lshr i64 %ld.64, 33
   %bit = and i64 %srl, 7
-  store volatile i64 %bit, i64 addrspace(1)* %out0.gep
+  store volatile i64 %bit, ptr addrspace(1) %out0.gep
 
   %srl.srl32 = lshr i64 %srl, 32
   %srl.hi = trunc i64 %srl.srl32 to i32
-  store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
+  store volatile i32 %srl.hi, ptr addrspace(1) %out1.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index 403e553f8bec..da1faae414ce 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -209,7 +209,7 @@ define amdgpu_kernel void @s_shl_i128_ss(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = shl i128 %lhs, %rhs
-  store i128 %shift, i128 addrspace(1)* null
+  store i128 %shift, ptr addrspace(1) null
   ret void
 }
 
@@ -242,7 +242,7 @@ define amdgpu_kernel void @s_lshr_i128_ss(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = lshr i128 %lhs, %rhs
-  store i128 %shift, i128 addrspace(1)* null
+  store i128 %shift, ptr addrspace(1) null
   ret void
 }
 
@@ -276,7 +276,7 @@ define amdgpu_kernel void @s_ashr_i128_ss(i128 %lhs, i128 %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = ashr i128 %lhs, %rhs
-  store i128 %shift, i128 addrspace(1)* null
+  store i128 %shift, ptr addrspace(1) null
   ret void
 }
 
@@ -497,7 +497,7 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = shl <2 x i128> %lhs, %rhs
-  store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+  store <2 x i128> %shift, ptr addrspace(1) null
   ret void
 }
 
@@ -569,7 +569,7 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = lshr <2 x i128> %lhs, %rhs
-  store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+  store <2 x i128> %shift, ptr addrspace(1) null
   ret void
 }
 
@@ -643,7 +643,7 @@ define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = ashr <2 x i128> %lhs, %rhs
-  store <2 x i128> %shift, <2 x i128> addrspace(1)* null
+  store <2 x i128> %shift, ptr addrspace(1) null
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
index 1c42fe4711ed..8b0ffcf83f71 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
@@ -8,10 +8,10 @@
 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = lshr i64 %val, 35
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -20,10 +20,10 @@ define amdgpu_kernel void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = lshr i64 %val, 63
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -32,10 +32,10 @@ define amdgpu_kernel void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_33(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = lshr i64 %val, 33
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -43,10 +43,10 @@ define amdgpu_kernel void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_i64_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = lshr i64 %val, 32
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -58,11 +58,11 @@ define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
 ; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[LO]], 8, 23
 ; GCN: buffer_store_dwordx2 v[[[BFE]]:[[ZERO]]]
-define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @lshr_and_i64_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff
   %shl = lshr i64 %and, 40
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,10 +73,10 @@ define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace
 ; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 3, [[VAL]]
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_35(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 35
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -84,10 +84,10 @@ define amdgpu_kernel void @shl_i64_const_35(i64 addrspace(1)* %out, i64 addrspac
 ; GCN-DAG: buffer_load_dword v[[HI:[0-9]+]]
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 32
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -96,28 +96,28 @@ define amdgpu_kernel void @shl_i64_const_32(i64 addrspace(1)* %out, i64 addrspac
 ; GCN: v_lshlrev_b32_e32 v[[HI:[0-9]+]], 31, [[VAL]]
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dwordx2 v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @shl_i64_const_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 63
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
 ; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x)
 
 ; GCN-LABEL: {{^}}ashr_i64_const_32:
-define amdgpu_kernel void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @ashr_i64_const_32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = ashr i64 %val, 32
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}ashr_i64_const_63:
-define amdgpu_kernel void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @ashr_i64_const_63(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = ashr i64 %val, 63
-  store i64 %shl, i64 addrspace(1)* %out
+  store i64 %shl, ptr addrspace(1) %out
   ret void
 }
 
@@ -125,11 +125,11 @@ define amdgpu_kernel void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspa
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 31, [[VAL]]
 ; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_31_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 31
   %trunc = trunc i64 %shl to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -137,11 +137,11 @@ define amdgpu_kernel void @trunc_shl_31_i32_i64(i32 addrspace(1)* %out, i64 addr
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
 ; GCN: buffer_store_short [[SHL]]
-define amdgpu_kernel void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_15_i16_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 15
   %trunc = trunc i64 %shl to i16
-  store i16 %trunc, i16 addrspace(1)* %out
+  store i16 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -149,11 +149,11 @@ define amdgpu_kernel void @trunc_shl_15_i16_i64(i16 addrspace(1)* %out, i64 addr
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 15, [[VAL]]
 ; GCN: buffer_store_short [[SHL]]
-define amdgpu_kernel void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %val = load i32, i32 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_15_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i32, ptr addrspace(1) %in
   %shl = shl i32 %val, 15
   %trunc = trunc i32 %shl to i16
-  store i16 %trunc, i16 addrspace(1)* %out
+  store i16 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -161,11 +161,11 @@ define amdgpu_kernel void @trunc_shl_15_i16_i32(i16 addrspace(1)* %out, i32 addr
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 7, [[VAL]]
 ; GCN: buffer_store_byte [[SHL]]
-define amdgpu_kernel void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_7_i8_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 7
   %trunc = trunc i64 %shl to i8
-  store i8 %trunc, i8 addrspace(1)* %out
+  store i8 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -174,11 +174,11 @@ define amdgpu_kernel void @trunc_shl_7_i8_i64(i8 addrspace(1)* %out, i64 addrspa
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
 ; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 2, [[SHL]]
 ; GCN: buffer_store_byte [[AND]]
-define amdgpu_kernel void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_1_i2_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 1
   %trunc = trunc i64 %shl to i2
-  store i2 %trunc, i2 addrspace(1)* %out
+  store i2 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -186,11 +186,11 @@ define amdgpu_kernel void @trunc_shl_1_i2_i64(i2 addrspace(1)* %out, i64 addrspa
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 1, [[VAL]]
 ; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_1_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 1
   %trunc = trunc i64 %shl to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -198,22 +198,22 @@ define amdgpu_kernel void @trunc_shl_1_i32_i64(i32 addrspace(1)* %out, i64 addrs
 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, [[VAL]]
 ; GCN: buffer_store_dword [[SHL]]
-define amdgpu_kernel void @trunc_shl_16_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_16_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 16
   %trunc = trunc i64 %shl to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}trunc_shl_33_i32_i64:
 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[ZERO]]
-define amdgpu_kernel void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_33_i32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 33
   %trunc = trunc i64 %shl to i32
-  store i32 %trunc, i32 addrspace(1)* %out
+  store i32 %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -222,11 +222,11 @@ define amdgpu_kernel void @trunc_shl_33_i32_i64(i32 addrspace(1)* %out, i64 addr
 ; GCN-DAG: v_lshlrev_b32_e32 v[[RESHI:[0-9]+]], 16, v{{[0-9]+}}
 ; GCN-DAG: v_lshlrev_b32_e32 v[[RESLO:[0-9]+]], 16, v[[LO]]
 ; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
-define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
-  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load <2 x i64>, ptr addrspace(1) %in
   %shl = shl <2 x i64> %val, <i64 16, i64 16>
   %trunc = trunc <2 x i64> %shl to <2 x i32>
-  store <2 x i32> %trunc, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %trunc, ptr addrspace(1) %out
   ret void
 }
 
@@ -235,12 +235,12 @@ define amdgpu_kernel void @trunc_shl_16_v2i32_v2i64(<2 x i32> addrspace(1)* %out
 ; GCN: v_lshl_b64 v[[[RESLO:[0-9]+]]:[[RESHI:[0-9]+]]], [[VAL]], 31
 ; GCN: buffer_store_dword v[[RESLO]]
 ; GCN: buffer_store_dwordx2 v[[[RESLO]]:[[RESHI]]]
-define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %val = load i64, i64 addrspace(1)* %in
+define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load i64, ptr addrspace(1) %in
   %shl = shl i64 %val, 31
   %trunc = trunc i64 %shl to i32
-  store volatile i32 %trunc, i32 addrspace(1)* %out
-  store volatile i64 %shl, i64 addrspace(1)* %in
+  store volatile i32 %trunc, ptr addrspace(1) %out
+  store volatile i64 %shl, ptr addrspace(1) %in
   ret void
 }
 
@@ -248,14 +248,14 @@ define amdgpu_kernel void @trunc_shl_31_i32_i64_multi_use(i32 addrspace(1)* %out
 ; GCN:     v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-NOT: v_lshl_b64
 ; GCN-NOT: v_lshlrev_b64
-define amdgpu_kernel void @trunc_shl_and31(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_and31(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 31
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = shl i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -264,41 +264,41 @@ bb:
 ; GCN:     v_lshlrev_b32_e32 v{{[0-9]+}}, s[[AMT]], v{{[0-9]+}}
 ; GCN-NOT: v_lshl_b64
 ; GCN-NOT: v_lshlrev_b64
-define amdgpu_kernel void @trunc_shl_and30(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_and30(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 30
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = shl i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}trunc_shl_wrong_and63:
 ; Negative test, wrong constant
 ; GCN: v_lshl_b64
-define amdgpu_kernel void @trunc_shl_wrong_and63(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_wrong_and63(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp3 = and i32 %arg2, 63
   %tmp4 = zext i32 %tmp3 to i64
   %tmp5 = shl i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}trunc_shl_no_and:
 ; Negative test, shift can be full 64 bit
 ; GCN: v_lshl_b64
-define amdgpu_kernel void @trunc_shl_no_and(i64 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) {
+define amdgpu_kernel void @trunc_shl_no_and(ptr addrspace(1) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i32 %arg2) {
 bb:
-  %tmp = load i64, i64 addrspace(1)* %arg, align 8
+  %tmp = load i64, ptr addrspace(1) %arg, align 8
   %tmp4 = zext i32 %arg2 to i64
   %tmp5 = shl i64 %tmp, %tmp4
   %tmp6 = trunc i64 %tmp5 to i32
-  store i32 %tmp6, i32 addrspace(1)* %arg1, align 4
+  store i32 %tmp6, ptr addrspace(1) %arg1, align 4
   ret void
 }
 
@@ -307,10 +307,10 @@ bb:
 ; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4
 ; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 5
 ; GCN-DAG: v_lshl_b64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 6
-define amdgpu_kernel void @trunc_shl_vec_vec(<4 x i64> addrspace(1)* %arg) {
+define amdgpu_kernel void @trunc_shl_vec_vec(ptr addrspace(1) %arg) {
 bb:
-  %v = load <4 x i64>, <4 x i64> addrspace(1)* %arg, align 32
+  %v = load <4 x i64>, ptr addrspace(1) %arg, align 32
   %shl = shl <4 x i64> %v, <i64 3, i64 4, i64 5, i64 6>
-  store <4 x i64> %shl, <4 x i64> addrspace(1)* %arg, align 32
+  store <4 x i64> %shl, ptr addrspace(1) %arg, align 32
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
index 845b88b86e3d..351593a0826f 100644
--- a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
@@ -9,14 +9,14 @@
 ; CHECK-NOT: v_lshl
 ; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]]
 ; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
-define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @add_const_offset(ptr addrspace(1) nocapture %arg) {
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %id, 200
   %shl = shl i32 %add, 2
-  %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl
-  %val = load i32, i32 addrspace(1)* %ptr, align 4
-  store i32 %val, i32 addrspace(1)* %arg, align 4
+  %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %shl
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  store i32 %val, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -26,14 +26,14 @@ bb:
 ; CHECK-NOT: v_lshl
 ; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]]
 ; CHECK: load_dword v{{[0-9]+}}, v[[[ADDRLO]]:
-define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @or_const_offset(ptr addrspace(1) nocapture %arg) {
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %add = or i32 %id, 256
   %shl = shl i32 %add, 2
-  %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl
-  %val = load i32, i32 addrspace(1)* %ptr, align 4
-  store i32 %val, i32 addrspace(1)* %arg, align 4
+  %ptr = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %shl
+  %val = load i32, ptr addrspace(1) %ptr, align 4
+  store i32 %val, ptr addrspace(1) %arg, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 8e59750efa78..7e6da2c321f7 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 declare i32 @llvm.amdgcn.workgroup.id.x() #0
 
-define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -59,15 +59,15 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add
 ; EG-NEXT:     LSHL T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = shl <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -129,15 +129,15 @@ define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add
 ; EG-NEXT:     LSHL T0.X, T0.X, T1.X,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = shl <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -203,15 +203,15 @@ define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in
 ; EG-NEXT:     MOV * T0.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %a = load i16, i16 addrspace(1)* %in
-  %b = load i16, i16 addrspace(1)* %b_ptr
+  %b_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1
+  %a = load i16, ptr addrspace(1) %in
+  %b = load i16, ptr addrspace(1) %b_ptr
   %result = shl i16 %a, %b
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_s(ptr addrspace(1) %out, ptr addrspace(1) %in, i16 %b) {
 ; SI-LABEL: shl_i16_v_s:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -278,13 +278,13 @@ define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)*
 ; EG-NEXT:     MOV * T0.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %a = load i16, i16 addrspace(1)* %in
+  %a = load i16, ptr addrspace(1) %in
   %result = shl i16 %a, %b
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) {
+define amdgpu_kernel void @shl_i16_v_compute_s(ptr addrspace(1) %out, ptr addrspace(1) %in, i16 %b) {
 ; SI-LABEL: shl_i16_v_compute_s:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -360,14 +360,14 @@ define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrs
 ; EG-NEXT:     MOV * T0.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %a = load i16, i16 addrspace(1)* %in
+  %a = load i16, ptr addrspace(1) %in
   %b.add = add i16 %b, 3
   %result = shl i16 %a, %b.add
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i16_computed_amount(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_i16_computed_amount:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -458,18 +458,18 @@ define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 a
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid
-  %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1
-  %a = load volatile i16, i16 addrspace(1)* %in
-  %b = load volatile i16, i16 addrspace(1)* %b_ptr
+  %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i32 %tid
+  %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i16 1
+  %a = load volatile i16, ptr addrspace(1) %in
+  %b = load volatile i16, ptr addrspace(1) %b_ptr
   %b.add = add i16 %b, 3
   %result = shl i16 %a, %b.add
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
+define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
 ; SI-LABEL: shl_i16_i_s:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -523,11 +523,11 @@ define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) {
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = shl i16 %a, 12
-  store i16 %result, i16 addrspace(1)* %out
+  store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v2i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -617,17 +617,17 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
 ; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i32 %tid
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
+  %a = load <2 x i16>, ptr addrspace(1) %in
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = shl <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v4i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -747,17 +747,17 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; EG-NEXT:     MOV T7.X, PV.Y,
 ; EG-NEXT:     MOV * T10.X, T6.X,
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+  %gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
+  %a = load <4 x i16>, ptr addrspace(1) %gep
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = shl <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %gep.out
+  store <4 x i16> %result, ptr addrspace(1) %gep.out
   ret void
 }
 
-define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @shl_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -816,15 +816,15 @@ define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in
 ; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64, i64 addrspace(1)* %in
-  %b = load i64, i64 addrspace(1)* %b_ptr
+  %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+  %a = load i64, ptr addrspace(1) %in
+  %b = load i64, ptr addrspace(1) %b_ptr
   %result = shl i64 %a, %b
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -899,15 +899,15 @@ define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add
 ; EG-NEXT:     CNDE_INT T3.X, T2.W, T0.Z, 0.0,
 ; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
+  %a = load <2 x i64>, ptr addrspace(1) %in
+  %b = load <2 x i64>, ptr addrspace(1) %b_ptr
   %result = shl <2 x i64> %a, %b
-  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @shl_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1023,16 +1023,16 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ; EG-NEXT:     LSHR * T2.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+  %a = load <4 x i64>, ptr addrspace(1) %in
+  %b = load <4 x i64>, ptr addrspace(1) %b_ptr
   %result = shl <4 x i64> %a, %b
-  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; Make sure load width gets reduced to i32 load.
-define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @s_shl_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
 ; SI-LABEL: s_shl_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0x13
@@ -1069,11 +1069,11 @@ define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = shl i64 %a, 32
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_shl_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_shl_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1132,15 +1132,15 @@ define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)
 ; EG-NEXT:     MOV * T1.Y, T0.X,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0
-  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.in
+  %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.in
   %result = shl i64 %a, 32
-  store i64 %result, i64 addrspace(1)* %gep.out
+  store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }
 
-define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
+define amdgpu_kernel void @s_shl_constant_i64(ptr addrspace(1) %out, i64 %a) {
 ; SI-LABEL: s_shl_constant_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1194,11 +1194,11 @@ define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 281474976710655, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_constant_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-LABEL: v_shl_constant_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1263,13 +1263,13 @@ define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrsp
 ; EG-NEXT:     CNDE_INT T0.X, T1.W, T1.Z, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %a = load i64, ptr addrspace(1) %aptr, align 8
   %shl = shl i64 1231231234567, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_i64_32_bit_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-LABEL: v_shl_i64_32_bit_constant:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1331,13 +1331,13 @@ define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64
 ; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %a = load i64, ptr addrspace(1) %aptr, align 8
   %shl = shl i64 1234567, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+define amdgpu_kernel void @v_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) {
 ; SI-LABEL: v_shl_inline_imm_64_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1394,13 +1394,13 @@ define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
 ; EG-NEXT:     CNDE_INT T0.X, T1.W, T0.W, 0.0,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %a = load i64, ptr addrspace(1) %aptr, align 8
   %shl = shl i64 64, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_64_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1446,11 +1446,11 @@ define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 a
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 64, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_1_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1497,11 +1497,11 @@ define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 ad
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 1, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_1_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1544,11 +1544,11 @@ define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 4607182418800017408, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_neg_1_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1591,11 +1591,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 13830554455654793216, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_0_5_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1638,11 +1638,11 @@ define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 4602678819172646912, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_neg_0_5_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1685,11 +1685,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 13826050856027422720, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_2_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1732,11 +1732,11 @@ define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 4611686018427387904, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_neg_2_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1779,11 +1779,11 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 13835058055282163712, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1826,11 +1826,11 @@ define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 4616189618054758400, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_neg_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xd
@@ -1873,14 +1873,14 @@ define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 13839561654909534208, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
 
 ; Test with the 64-bit integer bitpattern for a 32-bit float in the
 ; low 32-bits, which is not a valid 64-bit inline immmediate.
-define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_f32_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -1929,12 +1929,12 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 1082130432, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FIXME: Copy of -1 register
-define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -1986,11 +1986,11 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %o
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 -1065353216, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_high_imm_f32_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -2037,11 +2037,11 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 4647714815446351872, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, i64 %a) {
 ; SI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -2088,7 +2088,7 @@ define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %shl = shl i64 13871086852301127680, %a
-  store i64 %shl, i64 addrspace(1)* %out, align 8
+  store i64 %shl, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -2128,11 +2128,11 @@ define amdgpu_kernel void @test_mul2(i32 %p) {
 ; EG-NEXT:     LSHL * T1.X, KC0[2].Y, 1,
 ; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
    %i = mul i32 %p, 2
-   store volatile i32 %i, i32 addrspace(1)* undef
+   store volatile i32 %i, ptr addrspace(1) undef
    ret void
 }
 
-define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) {
+define void @shl_or_k(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: shl_or_k:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2169,11 +2169,11 @@ define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) {
 ; EG-NEXT:    4(5.605194e-45), 2(2.802597e-45)
   %tmp0 = or i32 %in, 1
   %tmp2 = shl i32 %tmp0, 2
-  store i32 %tmp2, i32 addrspace(1)* %out
+  store i32 %tmp2, ptr addrspace(1) %out
   ret void
 }
 
-define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) {
+define void @shl_or_k_two_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %in) {
 ; SI-LABEL: shl_or_k_two_uses:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2213,8 +2213,8 @@ define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tmp0 = or i32 %in, 1
   %tmp2 = shl i32 %tmp0, 2
-  store i32 %tmp2, i32 addrspace(1)* %out0
-  store i32 %tmp0, i32 addrspace(1)* %out1
+  store i32 %tmp2, ptr addrspace(1) %out0
+  store i32 %tmp0, ptr addrspace(1) %out1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index 8ea1879919b3..ff66b7d99686 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
+define amdgpu_kernel void @s_shl_v2i16(ptr addrspace(1) %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
 ; GFX9-LABEL: s_shl_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -82,11 +82,11 @@ define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %result = shl <2 x i16> %lhs, %rhs
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: v_shl_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -163,17 +163,17 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <2 x i16>, ptr addrspace(1) %in.gep
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = shl <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_v_s_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
 ; GFX9-LABEL: shl_v_s_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -257,15 +257,15 @@ define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = shl <2 x i16> %vgpr, %sgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 {
+define amdgpu_kernel void @shl_s_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in, <2 x i16> %sgpr) #0 {
 ; GFX9-LABEL: shl_s_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -349,15 +349,15 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = shl <2 x i16> %sgpr, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_imm_v_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: shl_imm_v_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -434,15 +434,15 @@ define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = shl <2 x i16> <i16 8, i16 8>, %vgpr
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: shl_v_imm_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -515,15 +515,15 @@ define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <2 x i16>, ptr addrspace(1) %in.gep
   %result = shl <2 x i16> %vgpr, <i16 8, i16 8>
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out.gep
+  store <2 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: v_shl_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -613,17 +613,17 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in.gep, i32 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in.gep, i32 1
+  %a = load <4 x i16>, ptr addrspace(1) %in.gep
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = shl <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX9-LABEL: shl_v_imm_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -709,11 +709,11 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %in.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i64 %tid.ext
-  %vgpr = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep
+  %in.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i64 %tid.ext
+  %vgpr = load <4 x i16>, ptr addrspace(1) %in.gep
   %result = shl <4 x i16> %vgpr, <i16 8, i16 8, i16 8, i16 8>
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out.gep
+  store <4 x i16> %result, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
index 80755a01f425..3cf3a3925bb1 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -9,13 +9,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32, i32 addrspace(1)* %ptr, align 4
+  %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+  %val = load i32, ptr addrspace(1) %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
-  store i32 %result, i32 addrspace(1)* %out, align 4
+  store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -25,14 +25,14 @@ define amdgpu_kernel void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace
 ; SI-DAG: buffer_store_dword [[ADDREG]]
 ; SI-DAG: buffer_store_dword [[SHLREG]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32, i32 addrspace(1)* %ptr, align 4
+  %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+  %val = load i32, ptr addrspace(1) %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
-  store i32 %result, i32 addrspace(1)* %out0, align 4
-  store i32 %add, i32 addrspace(1)* %out1, align 4
+  store i32 %result, ptr addrspace(1) %out0, align 4
+  store i32 %add, ptr addrspace(1) %out1, align 4
   ret void
 }
 
@@ -43,13 +43,13 @@ define amdgpu_kernel void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i
 ; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @shl_2_add_999_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
-  %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32, i32 addrspace(1)* %ptr, align 4
+  %ptr = getelementptr i32, ptr addrspace(1) %in, i32 %tid.x
+  %val = load i32, ptr addrspace(1) %ptr, align 4
   %shl = add i32 %val, 999
   %result = shl i32 %shl, 2
-  store i32 %result, i32 addrspace(1)* %out, align 4
+  store i32 %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -60,11 +60,11 @@ define amdgpu_kernel void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspa
 ; SI: s_addk_i32 [[RESULT]], 0x3d8
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
 ; SI: buffer_store_dword [[VRESULT]]
-define amdgpu_kernel void @test_add_shl_add_constant(i32 addrspace(1)* %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
   %add.0 = add i32 %x, 123
   %shl = shl i32 %add.0, 3
   %add.1 = add i32 %shl, %y
-   store i32 %add.1, i32 addrspace(1)* %out, align 4
+   store i32 %add.1, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -76,11 +76,11 @@ define amdgpu_kernel void @test_add_shl_add_constant(i32 addrspace(1)* %out, [8
 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
 ; SI: buffer_store_dword [[VRESULT]]
 
-define amdgpu_kernel void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, [8 x i32], i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @test_add_shl_add_constant_inv(ptr addrspace(1) %out, [8 x i32], i32 %x, i32 %y) #0 {
   %add.0 = add i32 %x, 123
   %shl = shl i32 %add.0, 3
   %add.1 = add i32 %y, %shl
-  store i32 %add.1, i32 addrspace(1)* %out, align 4
+  store i32 %add.1, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 30cecc085776..3c9a05af3c59 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -19,13 +19,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
-  store float %val0, float addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
+  store float %val0, ptr addrspace(1) %out
   ret void
 }
 
@@ -39,14 +39,14 @@ define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 add
 ; GCN-DAG: buffer_store_dword [[RESULT]]
 ; GCN-DAG: buffer_store_dword [[ADDUSE]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_1(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   %shl_add_use = shl i32 %idx.0, 2
-  store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
-  store float %val0, float addrspace(1)* %out
+  store i32 %shl_add_use, ptr addrspace(1) %add_use, align 4
+  store float %val0, ptr addrspace(1) %out
   ret void
 }
 
@@ -55,13 +55,13 @@ define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 add
 ; GCN-LABEL: {{^}}load_shl_base_lds_max_offset
 ; GCN: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @load_shl_base_lds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) %lds, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 65535
-  %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
-  %val0 = load i8, i8 addrspace(3)* %arrayidx0
-  store i32 %idx.0, i32 addrspace(1)* %add_use
-  store i8 %val0, i8 addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [65536 x i8], ptr addrspace(3) @maxlds, i32 0, i32 %idx.0
+  %val0 = load i8, ptr addrspace(3) %arrayidx0
+  store i32 %idx.0, ptr addrspace(1) %add_use
+  store i8 %val0, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,15 +73,15 @@ define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i
 ; GCN: s_mov_b32 m0, -1
 ; GCN: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
 ; GCN: s_endpgm
-define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @load_shl_base_lds_2(ptr addrspace(1) %out) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 64
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
-  %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+  %arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
+  %val1 = load float, ptr addrspace(3) %arrayidx1, align 4
   %sum = fadd float %val0, %val1
-  store float %sum, float addrspace(1)* %out, align 4
+  store float %sum, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -89,12 +89,12 @@ define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @store_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  store float 1.0, float addrspace(3)* %arrayidx0, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  store float 1.0, ptr addrspace(3) %arrayidx0, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -104,13 +104,13 @@ define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 ad
 
 @lds2 = addrspace(3) global [512 x i32] undef, align 4
 
-; define amdgpu_kernel void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_load_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
-;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-;   %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
-;   store i32 %val, i32 addrspace(1)* %out, align 4
-;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+;   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+;   %val = load atomic i32, ptr addrspace(3) %arrayidx0 seq_cst, align 4
+;   store i32 %val, ptr addrspace(1) %out, align 4
+;   store i32 %idx.0, ptr addrspace(1) %add_use, align 4
 ;   ret void
 ; }
 
@@ -119,14 +119,14 @@ define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 ad
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
+define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use, i32 %swap) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %pair = cmpxchg ptr addrspace(3) %arrayidx0, i32 7, i32 %swap seq_cst monotonic
   %result = extractvalue { i32, i1 } %pair, 0
-  store i32 %result, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  store i32 %result, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -134,13 +134,13 @@ define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out,
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_swap_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw xchg ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -148,13 +148,13 @@ define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i3
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_add_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw add ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -162,13 +162,13 @@ define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_sub_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw sub ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -176,13 +176,13 @@ define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_and_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw and ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -190,13 +190,13 @@ define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_or_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw or ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -204,23 +204,23 @@ define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_xor_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw xor ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
-; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
-;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-;   %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-;   store i32 %val, i32 addrspace(1)* %out, align 4
-;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+;   %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+;   %val = atomicrmw nand ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+;   store i32 %val, ptr addrspace(1) %out, align 4
+;   store i32 %idx.0, ptr addrspace(1) %add_use, align 4
 ;   ret void
 ; }
 
@@ -228,13 +228,13 @@ define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_min_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw min ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -242,13 +242,13 @@ define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_max_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw max ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -256,13 +256,13 @@ define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umin_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw umin ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -270,13 +270,13 @@ define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i3
 ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; GCN: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; GCN: s_endpgm
-define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_umax_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-  %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
-  store i32 %val, i32 addrspace(1)* %out, align 4
-  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw umax ptr addrspace(3) %arrayidx0, i32 3 seq_cst
+  store i32 %val, ptr addrspace(1) %out, align 4
+  store i32 %idx.0, ptr addrspace(1) %add_use, align 4
   ret void
 }
 
@@ -289,10 +289,10 @@ define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
   %idx.add = add nuw i32 %idx, 4
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
-  store volatile i32 9, i32 addrspace(3)* %ptr0
-  store volatile i32 10, i32 addrspace(3)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+  store volatile i32 9, ptr addrspace(3) %ptr0
+  store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
 
@@ -306,10 +306,10 @@ define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
   %idx.add = add nuw i32 %idx, 8191
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
-  store volatile i32 9, i32 addrspace(3)* %ptr0
-  store volatile i32 10, i32 addrspace(3)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+  store volatile i32 9, ptr addrspace(3) %ptr0
+  store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
 
@@ -323,10 +323,10 @@ define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
   %idx.add = add nuw i32 %idx, 4096
   %shl0 = shl i32 %idx.add, 4
   %shl1 = shl i32 %idx.add, 5
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
-  store volatile i32 9, i32 addrspace(3)* %ptr0
-  store volatile i32 10, i32 addrspace(3)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+  store volatile i32 9, ptr addrspace(3) %ptr0
+  store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
 
@@ -340,10 +340,10 @@ define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
   %idx.add = add nuw i32 %idx, 4
   %shl0 = shl i32 %idx.add, 2
   %shl1 = shl i32 %idx.add, 3
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
-  store volatile i32 9, i32 addrspace(5)* %ptr0
-  store volatile i32 10, i32 addrspace(5)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+  store volatile i32 9, ptr addrspace(5) %ptr0
+  store volatile i32 10, ptr addrspace(5) %ptr1
   ret void
 }
 
@@ -358,10 +358,10 @@ define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #
   %idx.add = add nuw i32 %idx, 511
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
-  store volatile i32 9, i32 addrspace(5)* %ptr0
-  store volatile i32 10, i32 addrspace(5)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+  store volatile i32 9, ptr addrspace(5) %ptr0
+  store volatile i32 10, ptr addrspace(5) %ptr1
   ret void
 }
 ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
@@ -375,10 +375,10 @@ define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.a
   %idx.add = add nuw i32 %idx, 256
   %shl0 = shl i32 %idx.add, 4
   %shl1 = shl i32 %idx.add, 5
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
-  store volatile i32 9, i32 addrspace(5)* %ptr0
-  store volatile i32 10, i32 addrspace(5)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(5)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(5)
+  store volatile i32 9, ptr addrspace(5) %ptr0
+  store volatile i32 10, ptr addrspace(5) %ptr1
   ret void
 }
 
@@ -393,10 +393,10 @@ define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
   %idx.add = or i32 %idx, 4
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
-  store volatile i32 9, i32 addrspace(3)* %ptr0
-  store volatile i32 10, i32 addrspace(3)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+  store volatile i32 9, ptr addrspace(3) %ptr0
+  store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
 
@@ -410,10 +410,10 @@ define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
   %idx.add = or i32 %idx, 8191
   %shl0 = shl i32 %idx.add, 3
   %shl1 = shl i32 %idx.add, 4
-  %ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
-  %ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
-  store volatile i32 9, i32 addrspace(3)* %ptr0
-  store volatile i32 10, i32 addrspace(3)* %ptr1
+  %ptr0 = inttoptr i32 %shl0 to ptr addrspace(3)
+  %ptr1 = inttoptr i32 %shl1 to ptr addrspace(3)
+  store volatile i32 9, ptr addrspace(3) %ptr0
+  store volatile i32 10, ptr addrspace(3) %ptr1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
index 66b7b342b31d..c04cb89e9527 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
@@ -7,16 +7,16 @@
 ; GCN: v_add_co_ci_u32_e32 v[[EXTRA_HI:[0-9]+]], vcc_lo, 0, v5, vcc_lo
 ; GCN: global_atomic_csub v{{[0-9]+}}, v[[[LO]]:[[HI]]], [[K]], off offset:512 glc
 ; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
-  %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+  %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
   %shl = shl i64 %cast, 2
-  %castback = inttoptr i64 %shl to i32 addrspace(1)*
-  %val = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %castback, i32 43)
-  store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+  %castback = inttoptr i64 %shl to ptr addrspace(1)
+  %val = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %castback, i32 43)
+  store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
   ret i32 %val
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #0
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
 
 attributes #0 = { argmemonly nounwind }

diff  --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
index d4709f2f7c18..30e799229359 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
@@ -7,13 +7,13 @@
 ; GCN-DAG: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
 ; GCN-DAG: global_atomic_and v[[[LO]]:[[HI]]], [[THREE]], off offset:512
 ; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define void @shl_base_atomicrmw_global_ptr(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
-  %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define void @shl_base_atomicrmw_global_ptr(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+  %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
   %shl = shl i64 %cast, 2
-  %castback = inttoptr i64 %shl to i32 addrspace(1)*
-  %val = atomicrmw and i32 addrspace(1)* %castback, i32 3 seq_cst
-  store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+  %castback = inttoptr i64 %shl to ptr addrspace(1)
+  %val = atomicrmw and ptr addrspace(1) %castback, i32 3 seq_cst
+  store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
   ret void
 }
 
@@ -24,17 +24,17 @@ define void @shl_base_atomicrmw_global_ptr(i32 addrspace(1)* %out, i64 addrspace
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x42c80000
 ; GCN-DAG: global_atomic_add_f32 v[[[LO]]:[[HI]]], [[K]], off offset:512
 ; GCN-DAG: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[EXTRA_LO]]:[[EXTRA_HI]]]
-define void @shl_base_global_ptr_global_atomic_fadd(i32 addrspace(1)* %out, i64 addrspace(1)* %extra.use, [512 x i32] addrspace(1)* %ptr) #0 {
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(1)* %ptr, i64 0, i64 32
-  %cast = ptrtoint i32 addrspace(1)* %arrayidx0 to i64
+define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) #0 {
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+  %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
   %shl = shl i64 %cast, 2
-  %castback = inttoptr i64 %shl to float addrspace(1)*
-  call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %castback, float 100.0)
-  store volatile i64 %cast, i64 addrspace(1)* %extra.use, align 4
+  %castback = inttoptr i64 %shl to ptr addrspace(1)
+  call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %castback, float 100.0)
+  store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #1
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly nounwind willreturn }

diff  --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 2d43c58aa38a..44881e8345e5 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -5,7 +5,7 @@
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
-define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -61,15 +61,15 @@ define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; EG-NEXT:     ASHR T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = ashr <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -134,17 +134,17 @@ define amdgpu_kernel void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 ; EG-NEXT:     ASHR T0.X, T0.X, T1.X,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = ashr <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: The ashr operation is uniform, but because its operands come from a
 ; global load we end up with the vector instructions rather than scalar.
-define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v2i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -230,17 +230,17 @@ define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ad
 ; EG-NEXT:     OR_INT T6.X, PS, PV.W,
 ; EG-NEXT:     LSHR * T7.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1
-  %a = load <2 x i16>, <2 x i16> addrspace(1)* %in
-  %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %in, i16 1
+  %a = load <2 x i16>, ptr addrspace(1) %in
+  %b = load <2 x i16>, ptr addrspace(1) %b_ptr
   %result = ashr <2 x i16> %a, %b
-  store <2 x i16> %result, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: The ashr operation is uniform, but because its operands come from a
 ; global load we end up with the vector instructions rather than scalar.
-define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v4i16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -393,15 +393,15 @@ define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ad
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ; EG-NEXT:     MOV T7.X, PV.Y,
 ; EG-NEXT:     MOV * T10.X, T6.X,
-  %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %in
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
+  %a = load <4 x i16>, ptr addrspace(1) %in
+  %b = load <4 x i16>, ptr addrspace(1) %b_ptr
   %result = ashr <4 x i16> %a, %b
-  store <4 x i16> %result, <4 x i16> addrspace(1)* %out
+  store <4 x i16> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_ashr_i64(ptr addrspace(1) %out, i32 %in) {
 ; SI-LABEL: s_ashr_i64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -445,11 +445,11 @@ define amdgpu_kernel void @s_ashr_i64(i64 addrspace(1)* %out, i32 %in) {
 entry:
   %in.ext = sext i32 %in to i64
   %ashr = ashr i64 %in.ext, 8
-  store i64 %ashr, i64 addrspace(1)* %out
+  store i64 %ashr, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_i64_2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_i64_2:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -511,15 +511,15 @@ define amdgpu_kernel void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; EG-NEXT:    31(4.344025e-44), 2(2.802597e-45)
 ; EG-NEXT:     CNDE_INT * T0.Y, T1.W, T1.Z, PV.W,
 entry:
-  %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64, i64 addrspace(1)* %in
-  %b = load i64, i64 addrspace(1)* %b_ptr
+  %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+  %a = load i64, ptr addrspace(1) %in
+  %b = load i64, ptr addrspace(1) %b_ptr
   %result = ashr i64 %a, %b
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v2i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -594,16 +594,16 @@ define amdgpu_kernel void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> ad
 ; EG-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:     CNDE_INT * T0.Y, T2.W, T2.Y, T1.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
+  %a = load <2 x i64>, ptr addrspace(1) %in
+  %b = load <2 x i64>, ptr addrspace(1) %b_ptr
   %result = ashr <2 x i64> %a, %b
-  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  store <2 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: Broken on r600
-define amdgpu_kernel void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @ashr_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -712,15 +712,15 @@ define amdgpu_kernel void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> ad
 ; EG-NEXT:     LSHR T3.X, KC0[2].Y, literal.x,
 ; EG-NEXT:     CNDE_INT * T2.Y, T4.W, T5.Y, PV.W,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+  %a = load <4 x i64>, ptr addrspace(1) %in
+  %b = load <4 x i64>, ptr addrspace(1) %b_ptr
   %result = ashr <4 x i64> %a, %b
-  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @s_ashr_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
 ; SI-LABEL: s_ashr_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s6, s[0:1], 0x14
@@ -770,11 +770,11 @@ define amdgpu_kernel void @s_ashr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = ashr i64 %a, 32
   %add = add i64 %result, %b
-  store i64 %add, i64 addrspace(1)* %out
+  store i64 %add, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_ashr_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -831,15 +831,15 @@ define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1
 ; EG-NEXT:     ASHR * T0.Y, T0.X, literal.y,
 ; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.in
+  %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.in
   %result = ashr i64 %a, 32
-  store i64 %result, i64 addrspace(1)* %gep.out
+  store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }
 
-define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
+define amdgpu_kernel void @s_ashr_63_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
 ; SI-LABEL: s_ashr_63_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s6, s[0:1], 0x14
@@ -889,11 +889,11 @@ define amdgpu_kernel void @s_ashr_63_i64(i64 addrspace(1)* %out, [8 x i32], i64
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = ashr i64 %a, 63
   %add = add i64 %result, %b
-  store i64 %add, i64 addrspace(1)* %out
+  store i64 %add, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_ashr_63_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_ashr_63_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -954,11 +954,11 @@ define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1
 ; EG-NEXT:     MOV * T0.Y, PV.X,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.in
+  %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.in
   %result = ashr i64 %a, 63
-  store i64 %result, i64 addrspace(1)* %gep.out
+  store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index 9693c44fac30..a759896ac424 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -5,7 +5,7 @@
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
-define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -53,15 +53,15 @@ define amdgpu_kernel void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i
 ; EG-NEXT:     LSHR T0.X, T0.X, T0.Y,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32, i32 addrspace(1)* %in
-  %b = load i32, i32 addrspace(1)* %b_ptr
+  %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
+  %a = load i32, ptr addrspace(1) %in
+  %b = load i32, ptr addrspace(1) %b_ptr
   %result = lshr i32 %a, %b
-  store i32 %result, i32 addrspace(1)* %out
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_v2i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -113,15 +113,15 @@ define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; EG-NEXT:     LSHR T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <2 x i32>, ptr addrspace(1) %in
+  %b = load <2 x i32>, ptr addrspace(1) %b_ptr
   %result = lshr <2 x i32> %a, %b
-  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  store <2 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_v4i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -183,15 +183,15 @@ define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 ; EG-NEXT:     LSHR T0.X, T0.X, T1.X,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x i32>, ptr addrspace(1) %in
+  %b = load <4 x i32>, ptr addrspace(1) %b_ptr
   %result = lshr <4 x i32> %a, %b
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  store <4 x i32> %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -247,15 +247,15 @@ define amdgpu_kernel void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %i
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT * T0.Y, T1.W, T1.Z, 0.0,
-  %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64, i64 addrspace(1)* %in
-  %b = load i64, i64 addrspace(1)* %b_ptr
+  %b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
+  %a = load i64, ptr addrspace(1) %in
+  %b = load i64, ptr addrspace(1) %b_ptr
   %result = lshr i64 %a, %b
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+define amdgpu_kernel void @lshr_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_v4i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -358,16 +358,16 @@ define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> ad
 ; EG-NEXT:     LSHR * T3.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ; EG-NEXT:     CNDE_INT * T1.Y, T4.W, T4.Y, 0.0,
-  %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
+  %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
+  %a = load <4 x i64>, ptr addrspace(1) %in
+  %b = load <4 x i64>, ptr addrspace(1) %b_ptr
   %result = lshr <4 x i64> %a, %b
-  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  store <4 x i64> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; Make sure load width gets reduced to i32 load.
-define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) {
+define amdgpu_kernel void @s_lshr_32_i64(ptr addrspace(1) %out, [8 x i32], i64 %a) {
 ; SI-LABEL: s_lshr_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0x14
@@ -404,11 +404,11 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, [8 x i32], i64
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %result = lshr i64 %a, 32
-  store i64 %result, i64 addrspace(1)* %out
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+define amdgpu_kernel void @v_lshr_32_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_lshr_32_i64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -465,11 +465,11 @@ define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1
 ; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64, i64 addrspace(1)* %gep.in
+  %gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr i64, ptr addrspace(1) %out, i32 %tid
+  %a = load i64, ptr addrspace(1) %gep.in
   %result = lshr i64 %a, 32
-  store i64 %result, i64 addrspace(1)* %gep.out
+  store i64 %result, ptr addrspace(1) %gep.out
   ret void
 }