[llvm] 177ff42 - AMDGPU: Convert some fp op tests to opaque issues

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 29 15:09:00 PST 2022


Author: Matt Arsenault
Date: 2022-11-29T18:08:53-05:00
New Revision: 177ff42d8efe58f3d365eb9c10a8f261e2531314

URL: https://github.com/llvm/llvm-project/commit/177ff42d8efe58f3d365eb9c10a8f261e2531314
DIFF: https://github.com/llvm/llvm-project/commit/177ff42d8efe58f3d365eb9c10a8f261e2531314.diff

LOG: AMDGPU: Convert some fp op tests to opaque issues

fmax_legacy.ll had one test that produced "ptraddrspace(1)", since
somehow "i1addrspace(1)*" used to parse.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
    llvm/test/CodeGen/AMDGPU/clamp.ll
    llvm/test/CodeGen/AMDGPU/fabs.f16.ll
    llvm/test/CodeGen/AMDGPU/fabs.f64.ll
    llvm/test/CodeGen/AMDGPU/fabs.ll
    llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
    llvm/test/CodeGen/AMDGPU/fadd.f16.ll
    llvm/test/CodeGen/AMDGPU/fadd.ll
    llvm/test/CodeGen/AMDGPU/fadd64.ll
    llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
    llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
    llvm/test/CodeGen/AMDGPU/fconst64.ll
    llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
    llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
    llvm/test/CodeGen/AMDGPU/fdiv.ll
    llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
    llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
    llvm/test/CodeGen/AMDGPU/ffloor.ll
    llvm/test/CodeGen/AMDGPU/fma-combine.ll
    llvm/test/CodeGen/AMDGPU/fma.f64.ll
    llvm/test/CodeGen/AMDGPU/fma.ll
    llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
    llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
    llvm/test/CodeGen/AMDGPU/fmax3.ll
    llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
    llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
    llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
    llvm/test/CodeGen/AMDGPU/fmaxnum.ll
    llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
    llvm/test/CodeGen/AMDGPU/fmul.f16.ll
    llvm/test/CodeGen/AMDGPU/fmul.ll
    llvm/test/CodeGen/AMDGPU/fmul64.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
    llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
    llvm/test/CodeGen/AMDGPU/fnearbyint.ll
    llvm/test/CodeGen/AMDGPU/fneg-combines.ll
    llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
    llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
    llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
    llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
    llvm/test/CodeGen/AMDGPU/fneg.f16.ll
    llvm/test/CodeGen/AMDGPU/fneg.f64.ll
    llvm/test/CodeGen/AMDGPU/fneg.ll
    llvm/test/CodeGen/AMDGPU/fp-classify.ll
    llvm/test/CodeGen/AMDGPU/fract.f64.ll
    llvm/test/CodeGen/AMDGPU/fract.ll
    llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
    llvm/test/CodeGen/AMDGPU/fsqrt.ll
    llvm/test/CodeGen/AMDGPU/fsub.f16.ll
    llvm/test/CodeGen/AMDGPU/fsub.ll
    llvm/test/CodeGen/AMDGPU/fsub64.ll
    llvm/test/CodeGen/AMDGPU/half.ll
    llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
    llvm/test/CodeGen/AMDGPU/madak.ll
    llvm/test/CodeGen/AMDGPU/madmk.ll
    llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
    llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
    llvm/test/CodeGen/AMDGPU/rsq.ll
    llvm/test/CodeGen/AMDGPU/v_mac.ll
    llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
    llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index 487ab48655ec3..cb6ed8d471728 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -6,15 +6,15 @@
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN-NOT: [[A]]
 ; GCN: v_add_f32_e64 v{{[0-9]+}}, [[A]], 1.0 clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 1.0
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
+  store float %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -22,16 +22,16 @@ define amdgpu_kernel void @v_clamp_add_src_f32(float addrspace(1)* %out, float a
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[ADD]], [[ADD]] clamp{{$}}
-define amdgpu_kernel void @v_clamp_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 1.0
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
-  store volatile float %add, float addrspace(1)* undef
+  store float %clamp, ptr addrspace(1) %out.gep
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -39,16 +39,16 @@ define amdgpu_kernel void @v_clamp_multi_use_src_f32(float addrspace(1)* %out, f
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN-NOT: [[A]]
 ; GCN: v_add_f32_e64 v{{[0-9]+}}, [[A]], 1.0 clamp{{$}}
-define amdgpu_kernel void @v_clamp_dbg_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_dbg_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 1.0
   call void @llvm.dbg.value(metadata float %add, i64 0, metadata !4, metadata !9), !dbg !10
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
+  store float %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -56,16 +56,16 @@ define amdgpu_kernel void @v_clamp_dbg_use_src_f32(float addrspace(1)* %out, flo
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_floor_f32_e32 [[FLOOR:v[0-9]+]], [[A]]
 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[FLOOR]], -[[FLOOR]] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_neg_src_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_neg_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %floor = call float @llvm.floor.f32(float %a)
   %neg.floor = fneg float %floor
   %max = call float @llvm.maxnum.f32(float %neg.floor, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
+  store float %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -73,29 +73,29 @@ define amdgpu_kernel void @v_clamp_add_neg_src_f32(float addrspace(1)* %out, flo
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 0, [[ADD]]{{$}}
-define amdgpu_kernel void @v_non_clamp_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_non_clamp_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 1.0
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
-  store float %max, float addrspace(1)* %out.gep
+  store float %max, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_clamp_add_src_f32_denormals:
 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_add_f32_e64 [[ADD:v[0-9]+]], [[A]], 1.0 clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_f32_denormals(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_add_src_f32_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 1.0
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
+  store float %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -106,15 +106,15 @@ define amdgpu_kernel void @v_clamp_add_src_f32_denormals(float addrspace(1)* %ou
 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
 ; SI: v_add_f32_e64 [[ADD:v[0-9]+]], [[CVT]], 1.0 clamp{{$}}
 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[ADD]]
-define amdgpu_kernel void @v_clamp_add_src_f16_denorm(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %add = fadd half %a, 1.0
   %max = call half @llvm.maxnum.f16(half %add, half 0.0)
   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
-  store half %clamp, half addrspace(1)* %out.gep
+  store half %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -126,15 +126,15 @@ define amdgpu_kernel void @v_clamp_add_src_f16_denorm(half addrspace(1)* %out, h
 ; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
 ; SI: v_add_f32_e64 [[ADD:v[0-9]+]], [[CVT]], 1.0 clamp{{$}}
 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[ADD]]
-define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(half addrspace(1)* %out, half addrspace(1)* %aptr) #3 {
+define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %add = fadd half %a, 1.0
   %max = call half @llvm.maxnum.f16(half %add, half 0.0)
   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
-  store half %clamp, half addrspace(1)* %out.gep
+  store half %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -142,47 +142,47 @@ define amdgpu_kernel void @v_clamp_add_src_f16_no_denormals(half addrspace(1)* %
 ; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[A:[0-9]+]]:[[B:[0-9]+]]]
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[A]], 1.0 clamp{{$}}
 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, v[[B]], 1.0 clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %out, i32 %tid
-  %a = load <2 x float>, <2 x float> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x float>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x float>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x float>, ptr addrspace(1) %gep0
   %add = fadd <2 x float> %a, <float 1.0, float 1.0>
   %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %add, <2 x float> zeroinitializer)
   %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>)
-  store <2 x float> %clamp, <2 x float> addrspace(1)* %out.gep
+  store <2 x float> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_clamp_add_src_f64:
 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], 1.0 clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %a = load double, double addrspace(1)* %gep0
+  %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %a = load double, ptr addrspace(1) %gep0
   %add = fadd double %a, 1.0
   %max = call double @llvm.maxnum.f64(double %add, double 0.0)
   %clamp = call double @llvm.minnum.f64(double %max, double 1.0)
-  store double %clamp, double addrspace(1)* %out.gep
+  store double %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_clamp_mac_to_mad:
 ; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]}} clamp{{$}}
-define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float addrspace(1)* %aptr, float %a) #0 {
+define amdgpu_kernel void @v_clamp_mac_to_mad(ptr addrspace(1) %out, ptr addrspace(1) %aptr, float %a) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %b = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %b = load float, ptr addrspace(1) %gep0
 
   %mul = fmul float %a, %a
   %add = fadd float %mul, %b
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
   %res = fadd float %clamp, %b
-  store float %res, float addrspace(1)* %out.gep
+  store float %res, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -190,30 +190,30 @@ define amdgpu_kernel void @v_clamp_mac_to_mad(float addrspace(1)* %out, float ad
 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_denorm:
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %add, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_clamp_add_src_v2f16_no_denormals:
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #3 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %add, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -221,16 +221,16 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_no_denormals(<2 x half> addrspa
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %neg.add = fsub <2 x half> <half -0.0, half -0.0>, %add
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.add, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -238,18 +238,18 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg(<2 x half> addrspace
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_lo:[1,1] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %lo = extractelement <2 x half> %add, i32 0
   %neg.lo = fsub half -0.0, %lo
   %neg.lo.add = insertelement <2 x half> %add, half %neg.lo, i32 0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.add, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -257,18 +257,18 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_lo(<2 x half> addrsp
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] neg_hi:[1,1] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %hi = extractelement <2 x half> %add, i32 1
   %neg.hi = fsub half -0.0, %hi
   %neg.hi.add = insertelement <2 x half> %add, half %neg.hi, i32 1
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.add, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -276,17 +276,17 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_neg_hi(<2 x half> addrsp
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
 ; GFX9: v_pk_max_f16 [[MAX:v[0-9]+]], [[ADD]], [[ADD]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
-define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
 
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -294,17 +294,17 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(<2 x half> addrspac
 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
-define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %bc = bitcast <2 x half> %a to float
   %f32.op = fadd float %bc, 1.0
   %f32.op.cast = bitcast float %f32.op to <2 x half>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %f32.op.cast, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -312,16 +312,16 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f32_src(<2 x half> addrspace
 ; GCN:  {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[A]], 1.0 op_sel_hi:[1,0]{{$}}
 ; GFX9: v_max_f32_e64 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
-define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(float addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
   %bc.add = bitcast <2 x half> %add to float
   %max = call float @llvm.maxnum.f32(float %bc.add, float 0.0)
   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
-  store float %clamp, float addrspace(1)* %out.gep
+  store float %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -331,18 +331,18 @@ define amdgpu_kernel void @v_no_clamp_add_packed_src_f32(float addrspace(1)* %ou
 ; GCN-DAG: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
 ; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0, [[A]]{{$}}
 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ADD]], [[ADD]] clamp{{$}}
-define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(<2 x half> addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %add = fadd half %a, 1.0
   %bc = bitcast half %add to i16
   %zext = zext i16 %bc to i32
   %v2f16 = bitcast i32 %zext to <2 x half>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %v2f16, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %clamp, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %clamp, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index b2770092456df..0585de22320be 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -61,17 +61,17 @@ define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neg_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -128,18 +128,18 @@ define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrs
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %fneg.a = fneg float %a
   %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negabs_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -196,20 +196,20 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %fabs.a = call float @llvm.fabs.f32(float %a)
   %fneg.fabs.a = fneg float %fabs.a
 
   %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negzero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -274,20 +274,20 @@ define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float a
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd nnan float %a, 0.5
   %max = call float @llvm.maxnum.f32(float %add, float -0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; FIXME: Weird inconsistency in how -0.0 is treated. Accepted if clamp
 ; matched through med3, not if directly. Is this correct?
-define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negzero_maybe_snan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -352,17 +352,17 @@ define amdgpu_kernel void @v_clamp_negzero_maybe_snan_f32(float addrspace(1)* %o
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %max = call float @llvm.maxnum.f32(float %a, float -0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_multi_use_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_multi_use_max_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -437,18 +437,18 @@ define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, f
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
-  store volatile float %max, float addrspace(1)* undef
+  store float %med, ptr addrspace(1) %out.gep
+  store volatile float %max, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -506,17 +506,17 @@ define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %max = call half @llvm.maxnum.f16(half %a, half 0.0)
   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 
-  store half %med, half addrspace(1)* %out.gep
+  store half %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neg_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -574,18 +574,18 @@ define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %fneg.a = fsub half -0.0, %a
   %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 
-  store half %med, half addrspace(1)* %out.gep
+  store half %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negabs_f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -643,20 +643,20 @@ define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addr
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %a = load half, half addrspace(1)* %gep0
+  %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %a = load half, ptr addrspace(1) %gep0
   %fabs.a = call half @llvm.fabs.f16(half %a)
   %fneg.fabs.a = fsub half -0.0, %fabs.a
 
   %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 
-  store half %med, half addrspace(1)* %out.gep
+  store half %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -713,17 +713,17 @@ define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %a = load double, double addrspace(1)* %gep0
+  %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %a = load double, ptr addrspace(1) %gep0
   %max = call double @llvm.maxnum.f64(double %a, double 0.0)
   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 
-  store double %med, double addrspace(1)* %out.gep
+  store double %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neg_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -780,18 +780,18 @@ define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double add
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %a = load double, double addrspace(1)* %gep0
+  %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %a = load double, ptr addrspace(1) %gep0
   %fneg.a = fsub double -0.0, %a
   %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 
-  store double %med, double addrspace(1)* %out.gep
+  store double %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negabs_f64:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -848,20 +848,20 @@ define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %a = load double, double addrspace(1)* %gep0
+  %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %a = load double, ptr addrspace(1) %gep0
   %fabs.a = call double @llvm.fabs.f64(double %a)
   %fneg.fabs.a = fsub double -0.0, %fabs.a
 
   %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 
-  store double %med, double addrspace(1)* %out.gep
+  store double %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_aby_negzero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -921,15 +921,15 @@ define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_aby_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_aby_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -986,15 +986,15 @@ define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_bay_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_bay_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1051,15 +1051,15 @@ define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_yab_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_yab_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1116,15 +1116,15 @@ define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_yba_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_yba_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1181,15 +1181,15 @@ define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_ayb_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_ayb_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1246,15 +1246,15 @@ define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_med3_bya_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_med3_bya_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1311,15 +1311,15 @@ define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constants_to_one_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constants_to_one_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1362,13 +1362,13 @@ define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constants_to_zero_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constants_to_zero_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1410,13 +1410,13 @@ define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %ou
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_preserve_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constant_preserve_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1459,13 +1459,13 @@ define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %ou
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constant_preserve_denorm_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1508,13 +1508,13 @@ define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_qnan_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constant_qnan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1556,13 +1556,13 @@ define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @v_clamp_constant_snan_f32(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: v_clamp_constant_snan_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1604,9 +1604,9 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1614,7 +1614,7 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #
 ; Test non-default behaviors enabling snans and disabling dx10_clamp
 ; ---------------------------------------------------------------------
 
-define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1676,18 +1676,18 @@ define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, f
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %a.nnan = fadd nnan float %a, 0.5
   %max = call float @llvm.maxnum.f32(float %a.nnan, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
+define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #3 {
 ; GFX6-LABEL: v_clamp_f32_snan_dx10clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1744,18 +1744,18 @@ define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add = fadd float %a, 0.5
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
+define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
 ; GFX6-LABEL: v_clamp_f32_snan_no_dx10clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1817,17 +1817,17 @@ define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %ou
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
+define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #4 {
 ; GFX6-LABEL: v_clamp_f32_snan_no_dx10clamp_nnan_src:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1889,18 +1889,18 @@ define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspac
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %add  = fadd nnan float %a, 1.0
   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_aby_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1957,15 +1957,15 @@ define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_bay_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2022,15 +2022,15 @@ define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_yab_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2087,15 +2087,15 @@ define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_yba_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2152,15 +2152,15 @@ define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_ayb_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2217,15 +2217,15 @@ define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 {
 ; GFX6-LABEL: v_clamp_med3_bya_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2282,15 +2282,15 @@ define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep0
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep0
   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
+define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(ptr addrspace(1) %out) #2 {
 ; GFX6-LABEL: v_clamp_constant_qnan_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2333,13 +2333,13 @@ define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
+define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(ptr addrspace(1) %out) #2 {
 ; GFX6-LABEL: v_clamp_constant_snan_f32_no_dx10_clamp:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -2382,13 +2382,13 @@ define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
-  store float %med, float addrspace(1)* %out.gep
+  store float %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2453,17 +2453,17 @@ define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x hal
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_elt(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_undef_elt:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2539,17 +2539,17 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_not_zero(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_not_zero:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2624,17 +2624,17 @@ define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_not_one(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_not_one:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2708,17 +2708,17 @@ define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neg_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2784,18 +2784,18 @@ define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_negabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_negabs_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2864,20 +2864,20 @@ define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
   %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
 
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neglo_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neglo_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -2943,20 +2943,20 @@ define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %lo = extractelement <2 x half> %a, i32 0
   %neg.lo = fsub half -0.0, %lo
   %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_neghi_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_neghi_v2f16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3021,20 +3021,20 @@ define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %hi = extractelement <2 x half> %a, i32 1
   %neg.hi = fsub half -0.0, %hi
   %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_shuffle:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3099,18 +3099,18 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_undef_limit_elts0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3185,17 +3185,17 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts0(<2 x half> addrspace(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half undef>)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half undef, half 1.0>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
+define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0 {
 ; GFX6-LABEL: v_clamp_v2f16_undef_limit_elts1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3271,17 +3271,17 @@ define amdgpu_kernel void @v_clamp_v2f16_undef_limit_elts1(<2 x half> addrspace(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
-  %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
+  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
+  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %a = load <2 x half>, ptr addrspace(1) %gep0
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
 
-  store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
+  store <2 x half> %med, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @v_clamp_
diff _source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0
+define amdgpu_kernel void @v_clamp_
diff _source_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #0
 ; GFX6-LABEL: v_clamp_
diff _source_f32:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -3351,19 +3351,18 @@ define amdgpu_kernel void @v_clamp_
diff _source_f32(float addrspace(1)* %out, flo
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 {
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0
-  %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1
-  %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2
-  %l0 = load float, float addrspace(1)* %gep0
-  %l1 = load float, float addrspace(1)* %gep1
-  %l2 = load float, float addrspace(1)* %gep2
+  %gep1 = getelementptr float, ptr addrspace(1) %aptr, i32 1
+  %gep2 = getelementptr float, ptr addrspace(1) %aptr, i32 2
+  %l0 = load float, ptr addrspace(1) %aptr
+  %l1 = load float, ptr addrspace(1) %gep1
+  %l2 = load float, ptr addrspace(1) %gep2
   %a = fadd nsz float %l0, %l1
   %b = fadd nsz float %l0, %l2
   %res = call nsz float @llvm.maxnum.f32(float %a, float %b)
   %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0)
   %min = call nsz float @llvm.minnum.f32(float %max, float 1.0)
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 3
-  store float %min, float addrspace(1)* %out.gep
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 3
+  store float %min, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
index 1568c52492248..4d86329483265 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -7,7 +7,7 @@
 ; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF))
 ; unless isFabsFree returns true
 
-define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
+define amdgpu_kernel void @s_fabs_free_f16(ptr addrspace(1) %out, i16 %in) {
 ; CI-LABEL: s_fabs_free_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -44,11 +44,11 @@ define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
 ; GFX9-NEXT:    s_endpgm
   %bc= bitcast i16 %in to half
   %fabs = call half @llvm.fabs.f16(half %bc)
-  store half %fabs, half addrspace(1)* %out
+  store half %fabs, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @s_fabs_f16(ptr addrspace(1) %out, half %in) {
 ; CI-LABEL: s_fabs_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -84,11 +84,11 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) {
 ; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %fabs = call half @llvm.fabs.f16(half %in)
-  store half %fabs, half addrspace(1)* %out
+  store half %fabs, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fabs_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
 ; CI-LABEL: s_fabs_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -124,11 +124,11 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
-  store <2 x half> %fabs, <2 x half> addrspace(1)* %out
+  store <2 x half> %fabs, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
+define amdgpu_kernel void @s_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
 ; CI-LABEL: s_fabs_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -167,11 +167,11 @@ define amdgpu_kernel void @s_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
-  store <4 x half> %fabs, <4 x half> addrspace(1)* %out
+  store <4 x half> %fabs, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fabs_fold_f16(half addrspace(1)* %out, half %in0, half %in1) {
+define amdgpu_kernel void @fabs_fold_f16(ptr addrspace(1) %out, half %in0, half %in1) {
 ; CI-LABEL: fabs_fold_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -214,11 +214,11 @@ define amdgpu_kernel void @fabs_fold_f16(half addrspace(1)* %out, half %in0, hal
 ; GFX9-NEXT:    s_endpgm
   %fabs = call half @llvm.fabs.f16(half %in0)
   %fmul = fmul half %fabs, %in1
-  store half %fmul, half addrspace(1)* %out
+  store half %fmul, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: v_fabs_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
@@ -258,15 +258,15 @@ define amdgpu_kernel void @v_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
-  store <2 x half> %fabs, <2 x half> addrspace(1)* %gep.out
+  store <2 x half> %fabs, ptr addrspace(1) %gep.out
   ret void
 }
 
-define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @fabs_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
 ; CI-LABEL: fabs_free_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -303,13 +303,13 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
 ; GFX9-NEXT:    s_endpgm
   %bc = bitcast i32 %in to <2 x half>
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %bc)
-  store <2 x half> %fabs, <2 x half> addrspace(1)* %out
+  store <2 x half> %fabs, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: Should do fabs after conversion to avoid converting multiple
 ; times in this particular case.
-define amdgpu_kernel void @v_fabs_fold_self_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_fabs_fold_self_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: v_fabs_fold_self_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -367,15 +367,15 @@ define amdgpu_kernel void @v_fabs_fold_self_v2f16(<2 x half> addrspace(1)* %out,
 ; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %fmul = fmul <2 x half> %fabs, %val
-  store <2 x half> %fmul, <2 x half> addrspace(1)* %out
+  store <2 x half> %fmul, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_fabs_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 %other.val) #0 {
+define amdgpu_kernel void @v_fabs_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %other.val) #0 {
 ; CI-LABEL: v_fabs_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -439,16 +439,16 @@ define amdgpu_kernel void @v_fabs_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x
 ; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %other.val.cvt = bitcast i32 %other.val to <2 x half>
   %fmul = fmul <2 x half> %fabs, %other.val.cvt
-  store <2 x half> %fmul, <2 x half> addrspace(1)* %out
+  store <2 x half> %fmul, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fabs_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_extract_fabs_fold_v2f16(ptr addrspace(1) %in) #0 {
 ; CI-LABEL: v_extract_fabs_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -507,20 +507,20 @@ define amdgpu_kernel void @v_extract_fabs_fold_v2f16(<2 x half> addrspace(1)* %i
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in
+  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep.in
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %elt0 = extractelement <2 x half> %fabs, i32 0
   %elt1 = extractelement <2 x half> %fabs, i32 1
 
   %fmul0 = fmul half %elt0, 4.0
   %fadd1 = fadd half %elt1, 2.0
-  store volatile half %fmul0, half addrspace(1)* undef
-  store volatile half %fadd1, half addrspace(1)* undef
+  store volatile half %fmul0, ptr addrspace(1) undef
+  store volatile half %fadd1, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0 {
 ; CI-LABEL: v_extract_fabs_no_fold_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -571,13 +571,13 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(<2 x half> addrspace(1)*
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in
+  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep.in
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %elt0 = extractelement <2 x half> %fabs, i32 0
   %elt1 = extractelement <2 x half> %fabs, i32 1
-  store volatile half %elt0, half addrspace(1)* undef
-  store volatile half %elt1, half addrspace(1)* undef
+  store volatile half %elt0, ptr addrspace(1) undef
+  store volatile half %elt1, ptr addrspace(1) undef
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
index 1629d55dcda96..32d5fa6e72d79 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
@@ -10,22 +10,22 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
 ; FUNC-LABEL: {{^}}v_fabs_f64:
 ; SI: v_and_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @v_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tidext = sext i32 %tid to i64
-  %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
-  %val = load double, double addrspace(1)* %gep, align 8
+  %gep = getelementptr double, ptr addrspace(1) %in, i64 %tidext
+  %val = load double, ptr addrspace(1) %gep, align 8
   %fabs = call double @llvm.fabs.f64(double %val)
-  store double %fabs, double addrspace(1)* %out
+  store double %fabs, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fabs_f64:
 ; SI: s_bitset0_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fabs_f64(ptr addrspace(1) %out, double %in) {
   %fabs = call double @llvm.fabs.f64(double %in)
-  store double %fabs, double addrspace(1)* %out
+  store double %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -33,9 +33,9 @@ define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double %in) {
 ; SI: s_bitset0_b32
 ; SI: s_bitset0_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
   %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
-  store <2 x double> %fabs, <2 x double> addrspace(1)* %out
+  store <2 x double> %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -45,9 +45,9 @@ define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; SI: s_bitset0_b32
 ; SI: s_bitset0_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
   %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
-  store <4 x double> %fabs, <4 x double> addrspace(1)* %out
+  store <4 x double> %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -56,10 +56,10 @@ define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x doub
 ; SI-NOT: and
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, [8 x i32], double %in0, [8 x i32], double %in1) {
+define amdgpu_kernel void @fabs_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
   %fabs = call double @llvm.fabs.f64(double %in0)
   %fmul = fmul double %fabs, %in1
-  store double %fmul, double addrspace(1)* %out
+  store double %fmul, ptr addrspace(1) %out
   ret void
 }
 
@@ -68,29 +68,29 @@ define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, [8 x i32], d
 ; SI-NOT: and
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_fn_fold_f64(double addrspace(1)* %out, [8 x i32], double %in0, [8 x i32], double %in1) {
+define amdgpu_kernel void @fabs_fn_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
   %fabs = call double @fabs(double %in0)
   %fmul = fmul double %fabs, %in1
-  store double %fmul, double addrspace(1)* %out
+  store double %fmul, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fabs_free_f64:
 ; SI: s_bitset0_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
   %bc= bitcast i64 %in to double
   %fabs = call double @llvm.fabs.f64(double %bc)
-  store double %fabs, double addrspace(1)* %out
+  store double %fabs, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fabs_fn_free_f64:
 ; SI: s_bitset0_b32
 ; SI: s_endpgm
-define amdgpu_kernel void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
   %bc= bitcast i64 %in to double
   %fabs = call double @fabs(double %bc)
-  store double %fabs, double addrspace(1)* %out
+  store double %fabs, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index 12b1792258f15..e18c76f89b6c7 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -12,10 +12,10 @@
 ; R600: |PV.{{[XYZW]}}|
 
 ; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_fabsf_fn_free(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_fabsf_fn_free(ptr addrspace(1) %out, i32 %in) {
   %bc= bitcast i32 %in to float
   %fabs = call float @fabsf(float %bc)
-  store float %fabs, float addrspace(1)* %out
+  store float %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -24,10 +24,10 @@ define amdgpu_kernel void @s_fabsf_fn_free(float addrspace(1)* %out, i32 %in) {
 ; R600: |PV.{{[XYZW]}}|
 
 ; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_fabsf_free(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @s_fabsf_free(ptr addrspace(1) %out, i32 %in) {
   %bc= bitcast i32 %in to float
   %fabs = call float @llvm.fabs.f32(float %bc)
-  store float %fabs, float addrspace(1)* %out
+  store float %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -35,9 +35,9 @@ define amdgpu_kernel void @s_fabsf_free(float addrspace(1)* %out, i32 %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
 ; GCN: s_bitset0_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @s_fabsf_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @s_fabsf_f32(ptr addrspace(1) %out, float %in) {
   %fabs = call float @llvm.fabs.f32(float %in)
-  store float %fabs, float addrspace(1)* %out
+  store float %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -47,9 +47,9 @@ define amdgpu_kernel void @s_fabsf_f32(float addrspace(1)* %out, float %in) {
 
 ; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
 ; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
-define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fabs_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
-  store <2 x float> %fabs, <2 x float> addrspace(1)* %out
+  store <2 x float> %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -63,9 +63,9 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; GCN: s_bitset0_b32
 ; GCN: s_bitset0_b32
 ; GCN: s_bitset0_b32
-define amdgpu_kernel void @fabsf_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
-  store <4 x float> %fabs, <4 x float> addrspace(1)* %out
+  store <4 x float> %fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -75,10 +75,10 @@ define amdgpu_kernel void @fabsf_v4f32(<4 x float> addrspace(1)* %out, <4 x floa
 ; GCN-NOT: and
 ; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[#LOAD + 3]]
 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[#LOAD + 2]]|, [[V_MUL_VI]]
-define amdgpu_kernel void @fabsf_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
+define amdgpu_kernel void @fabsf_fn_fold(ptr addrspace(1) %out, float %in0, float %in1) {
   %fabs = call float @fabsf(float %in0)
   %fmul = fmul float %fabs, %in1
-  store float %fmul, float addrspace(1)* %out
+  store float %fmul, ptr addrspace(1) %out
   ret void
 }
 
@@ -88,22 +88,22 @@ define amdgpu_kernel void @fabsf_fn_fold(float addrspace(1)* %out, float %in0, f
 ; GCN-NOT: and
 ; GCN: v_mov_b32_e32 [[V_MUL_VI:v[0-9]+]], s[[#LOAD + 3]]
 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, |s[[#LOAD + 2]]|, [[V_MUL_VI]]
-define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
+define amdgpu_kernel void @fabs_fold(ptr addrspace(1) %out, float %in0, float %in1) {
   %fabs = call float @llvm.fabs.f32(float %in0)
   %fmul = fmul float %fabs, %in1
-  store float %fmul, float addrspace(1)* %out
+  store float %fmul, ptr addrspace(1) %out
   ret void
 }
 
 ; Make sure we turn some integer operations back into fabsf
 ; FUNC-LABEL: {{^}}bitpreserve_fabsf_f32:
 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0
-define amdgpu_kernel void @bitpreserve_fabsf_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @bitpreserve_fabsf_f32(ptr addrspace(1) %out, float %in) {
   %in.bc = bitcast float %in to i32
   %int.abs = and i32 %in.bc, 2147483647
   %bc = bitcast i32 %int.abs to float
   %fadd = fadd float %bc, 1.0
-  store float %fadd, float addrspace(1)* %out
+  store float %fadd, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
index 7c45f4a8a3f02..c93575cfb2021 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -71,15 +71,15 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fadd fast float %fma, %z
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -145,15 +145,15 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fsub fast float %fma, %z
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -227,16 +227,16 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
-  store volatile float %mul.u.v, float addrspace(1)* undef
+  store volatile float %mul.u.v, ptr addrspace(1) undef
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fadd fast float %fma, %z
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -310,16 +310,16 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_mul_commute() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
-  store volatile float %mul.u.v, float addrspace(1)* undef
+  store volatile float %mul.u.v, ptr addrspace(1) undef
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fadd fast float %z, %fma
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -393,16 +393,16 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
-  store volatile float %fma, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
   %add = fadd fast float %fma, %z
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -476,16 +476,16 @@ define amdgpu_kernel void @fast_add_fmuladd_fmul_multi_use_fmuladd_commute() #0
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
-  store volatile float %fma, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
   %add = fadd fast float %z, %fma
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -559,16 +559,16 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_mul() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %sub = fsub fast float %fma, %z
-  store volatile float %mul.u.v, float addrspace(1)* undef
-  store volatile float %sub, float addrspace(1)* undef
+  store volatile float %mul.u.v, ptr addrspace(1) undef
+  store volatile float %sub, ptr addrspace(1) undef
   ret void
 }
 
@@ -642,16 +642,16 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_lhs() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fsub fast float %fma, %z
-  store volatile float %fma, float addrspace(1)* undef
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -725,16 +725,16 @@ define amdgpu_kernel void @fast_sub_fmuladd_fmul_multi_use_fmuladd_rhs() #0 {
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile float, float addrspace(1)* undef
-  %v = load volatile float, float addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile float, ptr addrspace(1) undef
+  %v = load volatile float, ptr addrspace(1) undef
   %mul.u.v = fmul fast float %u, %v
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fsub fast float %z, %fma
-  store volatile float %fma, float addrspace(1)* undef
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -814,17 +814,17 @@ define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_lhs() #
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile half, half addrspace(1)* undef
-  %v = load volatile half, half addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile half, ptr addrspace(1) undef
+  %v = load volatile half, ptr addrspace(1) undef
   %mul.u.v.half = fmul fast half %u, %v
   %mul.u.v = fpext half %mul.u.v.half to float
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fsub fast float %fma, %z
-  store volatile float %fma, float addrspace(1)* undef
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -904,17 +904,17 @@ define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() #
 ; GCN-SLOWFMA-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; GCN-SLOWFMA-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-SLOWFMA-NEXT:    s_endpgm
-  %x = load volatile float, float addrspace(1)* undef
-  %y = load volatile float, float addrspace(1)* undef
-  %z = load volatile float, float addrspace(1)* undef
-  %u = load volatile half, half addrspace(1)* undef
-  %v = load volatile half, half addrspace(1)* undef
+  %x = load volatile float, ptr addrspace(1) undef
+  %y = load volatile float, ptr addrspace(1) undef
+  %z = load volatile float, ptr addrspace(1) undef
+  %u = load volatile half, ptr addrspace(1) undef
+  %v = load volatile half, ptr addrspace(1) undef
   %mul.u.v.half = fmul fast half %u, %v
   %mul.u.v = fpext half %mul.u.v.half to float
   %fma = call fast float @llvm.fmuladd.f32(float %x, float %y, float %mul.u.v)
   %add = fsub fast float %z, %fma
-  store volatile float %fma, float addrspace(1)* undef
-  store volatile float %add, float addrspace(1)* undef
+  store volatile float %fma, ptr addrspace(1) undef
+  store volatile float %add, ptr addrspace(1) undef
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
index 34d7fda0167d1..2cd0e0d1fe93b 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -12,14 +12,14 @@
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = fadd half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -32,12 +32,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_f16_imm_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load half, half addrspace(1)* %b
+  %b.val = load half, ptr addrspace(1) %b
   %r.val = fadd half 1.0, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -50,12 +50,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_f16_imm_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load half, half addrspace(1)* %a
+  %a.val = load half, ptr addrspace(1) %a
   %r.val = fadd half %a.val, 2.0
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -86,17 +86,17 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.a = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %a, i32 %tid
-  %gep.b = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %b, i32 %tid
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %gep.a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid
+  %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid
+  %a.val = load <2 x half>, ptr addrspace(1) %gep.a
+  %b.val = load <2 x half>, ptr addrspace(1) %gep.b
   %r.val = fadd <2 x half> %a.val, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -120,14 +120,14 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_v2f16_imm_a(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.b = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %b, i32 %tid
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid
+  %b.val = load <2 x half>, ptr addrspace(1) %gep.b
   %r.val = fadd <2 x half> <half 1.0, half 2.0>, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -151,14 +151,14 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fadd_v2f16_imm_b(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.a = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %a, i32 %tid
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %gep.a
+  %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid
+  %a.val = load <2 x half>, ptr addrspace(1) %gep.a
   %r.val = fadd <2 x half> %a.val, <half 2.0, half 1.0>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fadd.ll b/llvm/test/CodeGen/AMDGPU/fadd.ll
index c08fd3572178f..ccf35e2ef6b98 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.ll
@@ -5,9 +5,9 @@
 ; FUNC-LABEL: {{^}}fadd_f32:
 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fadd_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
    %add = fadd float %a, %b
-   store float %add, float addrspace(1)* %out, align 4
+   store float %add, ptr addrspace(1) %out, align 4
    ret void
 }
 
@@ -16,9 +16,9 @@ define amdgpu_kernel void @fadd_f32(float addrspace(1)* %out, float %a, float %b
 ; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fadd_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
   %add = fadd <2 x float> %a, %b
-  store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
+  store <2 x float> %add, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -31,12 +31,12 @@ define amdgpu_kernel void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; SI: v_add_f32
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
+define amdgpu_kernel void @fadd_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in, align 16
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr, align 16
   %result = fadd <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %result, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -57,17 +57,17 @@ define amdgpu_kernel void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; SI: v_add_f32
 ; SI: v_add_f32
 ; SI: v_add_f32
-define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @fadd_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
   %add = fadd <8 x float> %a, %b
-  store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %add, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32:
 ; SI-NOT: v_add_f32
-define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 {
+define amdgpu_kernel void @fadd_0_nsz_attr_f32(ptr addrspace(1) %out, float %a) #1 {
    %add = fadd nsz float %a, 0.0
-   store float %add, float addrspace(1)* %out, align 4
+   store float %add, ptr addrspace(1) %out, align 4
    ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fadd64.ll b/llvm/test/CodeGen/AMDGPU/fadd64.ll
index 100cab7badbc5..12c560e523611 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd64.ll
@@ -3,23 +3,23 @@
 
 ; CHECK-LABEL: {{^}}v_fadd_f64:
 ; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                        double addrspace(1)* %in2) {
+define amdgpu_kernel void @v_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                        ptr addrspace(1) %in2) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep1 = getelementptr inbounds double, double addrspace(1)* %in1, i32 %tid
-  %gep2 = getelementptr inbounds double, double addrspace(1)* %in2, i32 %tid
-  %r0 = load double, double addrspace(1)* %gep1
-  %r1 = load double, double addrspace(1)* %gep2
+  %gep1 = getelementptr inbounds double, ptr addrspace(1) %in1, i32 %tid
+  %gep2 = getelementptr inbounds double, ptr addrspace(1) %in2, i32 %tid
+  %r0 = load double, ptr addrspace(1) %gep1
+  %r1 = load double, ptr addrspace(1) %gep2
   %r2 = fadd double %r0, %r1
-  store double %r2, double addrspace(1)* %out
+  store double %r2, ptr addrspace(1) %out
   ret void
 }
 
 ; CHECK-LABEL: {{^}}s_fadd_f64:
 ; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
+define amdgpu_kernel void @s_fadd_f64(ptr addrspace(1) %out, double %r0, double %r1) {
   %r2 = fadd double %r0, %r1
-  store double %r2, double addrspace(1)* %out
+  store double %r2, ptr addrspace(1) %out
   ret void
 }
 
@@ -27,12 +27,12 @@ define amdgpu_kernel void @s_fadd_f64(double addrspace(1)* %out, double %r0, dou
 ; CHECK: v_add_f64
 ; CHECK: v_add_f64
 ; CHECK: _store_dwordx4
-define amdgpu_kernel void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
-                          <2 x double> addrspace(1)* %in2) {
-  %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
-  %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+define amdgpu_kernel void @v_fadd_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                          ptr addrspace(1) %in2) {
+  %r0 = load <2 x double>, ptr addrspace(1) %in1
+  %r1 = load <2 x double>, ptr addrspace(1) %in2
   %r2 = fadd <2 x double> %r0, %r1
-  store <2 x double> %r2, <2 x double> addrspace(1)* %out
+  store <2 x double> %r2, ptr addrspace(1) %out
   ret void
 }
 
@@ -40,9 +40,9 @@ define amdgpu_kernel void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x do
 ; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 ; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 ; CHECK: _store_dwordx4
-define amdgpu_kernel void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) {
+define amdgpu_kernel void @s_fadd_v2f64(ptr addrspace(1) %out, <2 x double> %r0, <2 x double> %r1) {
   %r2 = fadd <2 x double> %r0, %r1
-  store <2 x double> %r2, <2 x double> addrspace(1)* %out
+  store <2 x double> %r2, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index b1cd8f32ab71c..eb1914d04bc15 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -11,7 +11,7 @@ declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0
 declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
-define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_undef_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -40,11 +40,11 @@ define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(half addrspace
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half undef)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -81,13 +81,13 @@ define amdgpu_kernel void @v_test_canonicalize_var_f16(half addrspace(1)* %out)
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %canonicalized = call half @llvm.canonicalize.f16(half %val)
-  store half %canonicalized, half addrspace(1)* undef
+  store half %canonicalized, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f16(ptr addrspace(1) %out, i16 zeroext %val.arg) #1 {
 ; VI-LABEL: s_test_canonicalize_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
@@ -123,7 +123,7 @@ define amdgpu_kernel void @s_test_canonicalize_var_f16(half addrspace(1)* %out,
 ; CI-NEXT:    s_endpgm
   %val = bitcast i16 %val.arg to half
   %canonicalized = call half @llvm.canonicalize.f16(half %val)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -158,7 +158,7 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
   ret <2 x half> %canonicalized
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -195,14 +195,14 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %val.fabs = call half @llvm.fabs.f16(half %val)
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fneg_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -239,15 +239,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %val.fabs = call half @llvm.fabs.f16(half %val)
   %val.fabs.fneg = fneg half %val.fabs
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fneg_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -284,14 +284,14 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %val.fneg = fneg half %val
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(ptr addrspace(1) %out) #2 {
 ; VI-LABEL: v_test_no_denormals_canonicalize_fneg_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -328,14 +328,14 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half ad
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %val.fneg = fneg half %val
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
+define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(ptr addrspace(1) %out) #2 {
 ; VI-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -372,15 +372,15 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(ha
 ; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %out
   %val.fabs = call half @llvm.fabs.f16(half %val)
   %val.fabs.fneg = fneg half %val.fabs
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_p0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -409,11 +409,11 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0.0)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_n0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -443,11 +443,11 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half -0.0)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_p1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -477,11 +477,11 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 1.0)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_n1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -511,11 +511,11 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half -1.0)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_literal_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -545,11 +545,11 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f16(half addrspace(1)*
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 16.0)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_default_denormals_fold_canonicalize_denormal0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -579,11 +579,11 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_f1
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(ptr addrspace(1) %out) #3 {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal0_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -613,11 +613,11 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f16(half a
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_default_denormals_fold_canonicalize_denormal1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -647,11 +647,11 @@ define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal1_f1
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(ptr addrspace(1) %out) #3 {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -681,11 +681,11 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f16(half a
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -715,11 +715,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %o
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg1_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -749,11 +749,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f16(half addrs
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half))
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg2_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -783,11 +783,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f16(half addrs
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half))
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan0_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -817,11 +817,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f16(half addrspace
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan1_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -851,11 +851,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f16(half addrspace
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan2_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -885,11 +885,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f16(half addrspace
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan3_value_f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -919,11 +919,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace
 ; CI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01)
-  store half %canonicalized, half addrspace(1)* %out
+  store half %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -979,14 +979,14 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fabs_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1043,15 +1043,15 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fneg_fabs_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1109,16 +1109,16 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
   %val.fabs.fneg = fneg <2 x half> %val.fabs
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: v_test_canonicalize_fneg_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1175,15 +1175,15 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep
   %fneg.val = fneg <2 x half> %val
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)* %out, i32 zeroext %val.arg) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_v2f16(ptr addrspace(1) %out, i32 zeroext %val.arg) #1 {
 ; VI-LABEL: s_test_canonicalize_var_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
@@ -1229,11 +1229,11 @@ define amdgpu_kernel void @s_test_canonicalize_var_v2f16(<2 x half> addrspace(1)
 ; CI-NEXT:    s_endpgm
   %val = bitcast i32 %val.arg to <2 x half>
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_p0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1262,11 +1262,11 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_v2f16(<2 x half> addrspace(
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> zeroinitializer)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_n0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1296,11 +1296,11 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_v2f16(<2 x half> addrspace(
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -0.0, half -0.0>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_p1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1330,11 +1330,11 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_v2f16(<2 x half> addrspace(
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 1.0, half 1.0>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_n1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1364,11 +1364,11 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_v2f16(<2 x half> addrspace(
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half -1.0, half -1.0>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_literal_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1398,11 +1398,11 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_v2f16(<2 x half> addrs
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 16.0, half 16.0>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_no_denormals_fold_canonicalize_denormal0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1432,11 +1432,11 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_v2f16(<
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x half> addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(ptr addrspace(1) %out) #3 {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal0_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1466,11 +1466,11 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_v2f16(<2 x
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH03FF, half 0xH03FF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_no_denormals_fold_canonicalize_denormal1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1500,11 +1500,11 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_v2f16(<
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x half> addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(ptr addrspace(1) %out) #3 {
 ; VI-LABEL: test_denormals_fold_canonicalize_denormal1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1534,11 +1534,11 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_v2f16(<2 x
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH83FF, half 0xH83FF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1568,11 +1568,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_v2f16(<2 x half> addrspac
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C00, half 0xH7C00>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg1_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1602,11 +1602,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_v2f16(<2 x hal
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> bitcast (i32 -1 to <2 x half>))
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_qnan_value_neg2_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1636,11 +1636,11 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_v2f16(<2 x hal
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half bitcast (i16 -2 to half), half bitcast (i16 -2 to half)>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan0_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1670,11 +1670,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_v2f16(<2 x half> a
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7C01, half 0xH7C01>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan1_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1704,11 +1704,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_v2f16(<2 x half> a
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xH7DFF, half 0xH7DFF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan2_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1738,11 +1738,11 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_v2f16(<2 x half> a
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFDFF, half 0xHFDFF>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_snan3_value_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1772,7 +1772,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_v2f16(<2 x half> a
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> <half 0xHFC01, half 0xHFC01>)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -1842,7 +1842,7 @@ define <4 x half> @v_test_canonicalize_var_v4f16(<4 x half> %val) #1 {
   ret <4 x half> %canonicalized
 }
 
-define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(<2 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: s_test_canonicalize_undef_v2f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1871,7 +1871,7 @@ define amdgpu_kernel void @s_test_canonicalize_undef_v2f16(<2 x half> addrspace(
 ; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
+  store <2 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -2080,7 +2080,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 {
   ret <2 x half> %canonicalized
 }
 
-define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(<4 x half> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: s_test_canonicalize_undef_v4f16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -2112,7 +2112,7 @@ define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(<4 x half> addrspace(
 ; CI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
   %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
-  store <4 x half> %canonicalized, <4 x half> addrspace(1)* %out
+  store <4 x half> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
index 1bb05a1cedc6e..e4d4b55210c2d 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -21,10 +21,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 ; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}}
 ; GFX9: v_max_f32_e32 [[REG:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
-  %val = load float, float addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1 {
+  %val = load float, ptr addrspace(1) %out
   %canonicalized = call float @llvm.canonicalize.f32(float %val)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -32,9 +32,9 @@ define amdgpu_kernel void @v_test_canonicalize_var_f32(float addrspace(1)* %out)
 ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}}
 ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out, float %val) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, float %val) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float %val)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -42,11 +42,11 @@ define amdgpu_kernel void @s_test_canonicalize_var_f32(float addrspace(1)* %out,
 ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], 1.0, |{{v[0-9]+}}|
 ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], |{{v[0-9]+}}|, |{{v[0-9]+}}|
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* %out) #1 {
-  %val = load float, float addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %out) #1 {
+  %val = load float, ptr addrspace(1) %out
   %val.fabs = call float @llvm.fabs.f32(float %val)
   %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -54,12 +54,12 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)*
 ; GFX678: v_mul_f32_e64 [[REG:v[0-9]+]], -1.0, |{{v[0-9]+}}|
 ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -|{{v[0-9]+}}|, -|{{v[0-9]+}}|
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
-  %val = load float, float addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1) %out) #1 {
+  %val = load float, ptr addrspace(1) %out
   %val.fabs = call float @llvm.fabs.f32(float %val)
   %val.fabs.fneg = fneg float %val.fabs
   %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -67,217 +67,217 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace
 ; GFX678: v_mul_f32_e32 [[REG:v[0-9]+]], -1.0, {{v[0-9]+}}
 ; GFX9: v_max_f32_e64 [[REG:v[0-9]+]], -{{v[0-9]+}}, -{{v[0-9]+}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
-  %val = load float, float addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %out) #1 {
+  %val = load float, ptr addrspace(1) %out
   %val.fneg = fneg float %val
   %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_undef_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_undef_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float undef)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f32:
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_p1_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_n1_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x41800000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_literal_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fffff{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(float addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f32:
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x807fffff{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(float addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #3 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f32:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000{{$}}
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[REG]]
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(float addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(1) %out) #1 {
   %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
-  store float %canonicalized, float addrspace(1)* %out
+  store float %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_var_f64:
 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
-  %val = load double, double addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) #1 {
+  %val = load double, ptr addrspace(1) %out
   %canonicalized = call double @llvm.canonicalize.f64(double %val)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_test_canonicalize_var_f64:
 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]]
-define amdgpu_kernel void @s_test_canonicalize_var_f64(double addrspace(1)* %out, double %val) #1 {
+define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, double %val) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double %val)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_f64:
 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], |{{v\[[0-9]+:[0-9]+\]}}|, |{{v\[[0-9]+:[0-9]+\]}}|
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* %out) #1 {
-  %val = load double, double addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %out) #1 {
+  %val = load double, ptr addrspace(1) %out
   %val.fabs = call double @llvm.fabs.f64(double %val)
   %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_f64:
 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]\]]], -|{{v\[[0-9]+:[0-9]+\]}}|, -|{{v\[[0-9]+:[0-9]+\]}}|
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
-  %val = load double, double addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1) %out) #1 {
+  %val = load double, ptr addrspace(1) %out
   %val.fabs = call double @llvm.fabs.f64(double %val)
   %val.fabs.fneg = fneg double %val.fabs
   %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_f64:
 ; GCN: v_max_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -{{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[REG]]
-define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
-  %val = load double, double addrspace(1)* %out
+define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %out) #1 {
+  %val = load double, ptr addrspace(1) %out
   %val.fneg = fneg double %val
   %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -285,9 +285,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)*
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -295,9 +295,9 @@ define amdgpu_kernel void @test_fold_canonicalize_p0_f64(double addrspace(1)* %o
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -305,9 +305,9 @@ define amdgpu_kernel void @test_fold_canonicalize_n0_f64(double addrspace(1)* %o
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x3ff00000{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -315,9 +315,9 @@ define amdgpu_kernel void @test_fold_canonicalize_p1_f64(double addrspace(1)* %o
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xbff00000{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -325,9 +325,9 @@ define amdgpu_kernel void @test_fold_canonicalize_n1_f64(double addrspace(1)* %o
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x40300000{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -335,9 +335,9 @@ define amdgpu_kernel void @test_fold_canonicalize_literal_f64(double addrspace(1
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], v[[LO]]{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #2 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #2 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -345,9 +345,9 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(dou
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0xfffff{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #3 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -355,9 +355,9 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(double
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: v_bfrev_b32_e32 v[[HI:[0-9]+]], 1{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #2 {
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #2 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -365,9 +365,9 @@ define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(dou
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x800fffff{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #3 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -375,9 +375,9 @@ define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(double
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -385,9 +385,9 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(double addrspace(1)*
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -395,9 +395,9 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(double add
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -405,9 +405,9 @@ define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(double add
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -415,9 +415,9 @@ define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(double addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -425,9 +425,9 @@ define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(double addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
@@ -435,48 +435,48 @@ define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(double addrspa
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7ff80000{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(double addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(1) %out) #1 {
   %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
-  store double %canonicalized, double addrspace(1)* %out
+  store double %canonicalized, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL:  {{^}}test_canonicalize_value_f64_flush:
 ; GFX678: v_mul_f64 v[{{[0-9:]+}}], 1.0, v[{{[0-9:]+}}]
 ; GCN9: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_canonicalize_value_f64_flush(double addrspace(1)* %arg, double addrspace(1)* %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
-  %v = load double, double addrspace(1)* %gep, align 8
+  %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
+  %v = load double, ptr addrspace(1) %gep, align 8
   %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
-  %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
-  store double %canonicalized, double addrspace(1)* %gep2, align 8
+  %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
+  store double %canonicalized, ptr addrspace(1) %gep2, align 8
   ret void
 }
 
 ; GCN-LABEL:  {{^}}test_canonicalize_value_f32_flush:
 ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
 ; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @test_canonicalize_value_f32_flush(float addrspace(1)* %arg, float addrspace(1)* %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
-  %v = load float, float addrspace(1)* %gep, align 4
+  %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
+  %v = load float, ptr addrspace(1) %gep, align 4
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
-  %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
-  store float %canonicalized, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
+  store float %canonicalized, ptr addrspace(1) %gep2, align 4
   ret void
 }
 
 ; GCN-LABEL:  {{^}}test_canonicalize_value_f16_flush:
 ; GFX8: v_mul_f16_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
 ; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)* %arg, half addrspace(1)* %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
-  %v = load half, half addrspace(1)* %gep, align 2
+  %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
+  %v = load half, ptr addrspace(1) %gep, align 2
   %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
-  %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
-  store half %canonicalized, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
+  store half %canonicalized, ptr addrspace(1) %gep2, align 2
   ret void
 }
 
@@ -486,38 +486,38 @@ define amdgpu_kernel void @test_canonicalize_value_f16_flush(half addrspace(1)*
 ; GFX8-DAG: v_mul_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 
 ; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
-define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #4 {
+define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
-  %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
+  %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
+  %v = load <2 x half>, ptr addrspace(1) %gep, align 4
   %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
-  %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
+  store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
   ret void
 }
 
 ; GCN-LABEL:  {{^}}test_canonicalize_value_f64_denorm:
 ; GCN: v_max_f64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-define amdgpu_kernel void @test_canonicalize_value_f64_denorm(double addrspace(1)* %arg, double addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
-  %v = load double, double addrspace(1)* %gep, align 8
+  %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
+  %v = load double, ptr addrspace(1) %gep, align 8
   %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
-  %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
-  store double %canonicalized, double addrspace(1)* %gep2, align 8
+  %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
+  store double %canonicalized, ptr addrspace(1) %gep2, align 8
   ret void
 }
 
 ; GCN-LABEL:  {{^}}test_canonicalize_value_f32_denorm:
 ; GFX678: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
 ; GFX9: v_max_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)* %arg, float addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
-  %v = load float, float addrspace(1)* %gep, align 4
+  %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
+  %v = load float, ptr addrspace(1) %gep, align 4
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
-  %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
-  store float %canonicalized, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
+  store float %canonicalized, ptr addrspace(1) %gep2, align 4
   ret void
 }
 
@@ -526,13 +526,13 @@ define amdgpu_kernel void @test_canonicalize_value_f32_denorm(float addrspace(1)
 ; GFX6: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
 ; GFX8: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 ; GFX9: v_max_f16_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)* %arg, half addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
-  %v = load half, half addrspace(1)* %gep, align 2
+  %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
+  %v = load half, ptr addrspace(1) %gep, align 2
   %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
-  %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
-  store half %canonicalized, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
+  store half %canonicalized, ptr addrspace(1) %gep2, align 2
   ret void
 }
 
@@ -544,25 +544,25 @@ define amdgpu_kernel void @test_canonicalize_value_f16_denorm(half addrspace(1)*
 ; GFX8: v_max_f16_e32
 
 ; GFX9: v_pk_max_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(<2 x half> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) #3 {
+define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %arg, i32 %id
-  %v = load <2 x half>, <2 x half> addrspace(1)* %gep, align 4
+  %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
+  %v = load <2 x half>, ptr addrspace(1) %gep, align 4
   %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
-  %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
-  store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
+  store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f64:
 ; GCN: v_max_f64
 ; GCN: v_max_f64
-define amdgpu_kernel void @v_test_canonicalize_var_v2f64(<2 x double> addrspace(1)* %out) #1 {
+define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x double>, <2 x double> addrspace(1)* %out, i32 %tid
-  %val = load <2 x double>, <2 x double> addrspace(1)* %gep
+  %gep = getelementptr <2 x double>, ptr addrspace(1) %out, i32 %tid
+  %val = load <2 x double>, ptr addrspace(1) %gep
   %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
-  store <2 x double> %canonicalized, <2 x double> addrspace(1)* %out
+  store <2 x double> %canonicalized, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fconst64.ll b/llvm/test/CodeGen/AMDGPU/fconst64.ll
index ca313d80894a6..42850a5366925 100644
--- a/llvm/test/CodeGen/AMDGPU/fconst64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fconst64.ll
@@ -5,12 +5,12 @@
 ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
 ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
 
-define amdgpu_kernel void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+define amdgpu_kernel void @fconst_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
    %tid = call i32 @llvm.amdgcn.workitem.id.x()
-   %gep = getelementptr inbounds double, double addrspace(1)* %in, i32 %tid
-   %r1 = load double, double addrspace(1)* %gep
+   %gep = getelementptr inbounds double, ptr addrspace(1) %in, i32 %tid
+   %r1 = load double, ptr addrspace(1) %gep
    %r2 = fadd double %r1, 5.000000e+00
-   store double %r2, double addrspace(1)* %out
+   store double %r2, ptr addrspace(1) %out
    ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
index 0cbed25036b3a..dabeda5d37fd9 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -35,19 +35,19 @@
 ; GFX8PLUS: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
 define amdgpu_kernel void @v_fdiv_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %a.val = load volatile half, half addrspace(1)* %gep.a
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds half, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %a.val = load volatile half, ptr addrspace(1) %gep.a
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -57,15 +57,15 @@ entry:
 ; GFX8PLUS: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; GFX8PLUS-NOT: [[RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rcp_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rcp_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv half 1.0, %b.val, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -75,16 +75,16 @@ entry:
 ; GFX8PLUS: v_rcp_f16_e64 [[RESULT:v[0-9]+]], |[[VAL]]|
 ; GFX8PLUS-NOT: [RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rcp_f16_abs(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_abs(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %b.abs = call half @llvm.fabs.f16(half %b.val)
   %r.val = fdiv half 1.0, %b.abs, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -97,15 +97,15 @@ entry:
 ; GFX8PLUS: v_cvt_f16_f32_e32 [[CVT_BACK16:v[0-9]+]], [[RCP32]]
 ; GFX8PLUS: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK16]], [[VAL16]], 1.0
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @reciprocal_f16_rounded(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @reciprocal_f16_rounded(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv half 1.0, %b.val
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -115,15 +115,15 @@ entry:
 ; GFX8PLUS: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; GFX8PLUS-NOT: [[RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rcp_f16_afn(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv afn half 1.0, %b.val, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -133,15 +133,15 @@ entry:
 ; GFX8PLUS: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[VAL]]
 ; GFX8PLUS-NOT: [RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rcp_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rcp_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv half -1.0, %b.val, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -151,16 +151,16 @@ entry:
 ; GFX8PLUS: v_rsq_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; GFX8PLUS-NOT: [RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rsq_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rsq_f16(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
   %r.val = fdiv half 1.0, %b.sqrt, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -172,16 +172,16 @@ entry:
 ; GFX8PLUS-NEXT: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[SQRT]]
 ; GFX8PLUS-NOT: [RESULT]]
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_rsq_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_rsq_f16_neg(ptr addrspace(1) %r, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
   %r.val = fdiv half -1.0, %b.sqrt, !fpmath !0
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -193,17 +193,17 @@ entry:
 ; GFX8PLUS: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[LHS]], [[RCP]]
 
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_fdiv_f16_afn(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 {
+define amdgpu_kernel void @v_fdiv_f16_afn(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %a.val = load volatile half, half addrspace(1)* %gep.a
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds half, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %a.val = load volatile half, ptr addrspace(1) %gep.a
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv afn half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -215,17 +215,17 @@ entry:
 ; GFX8PLUS: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[LHS]], [[RCP]]
 
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 {
+define amdgpu_kernel void @v_fdiv_f16_unsafe(ptr addrspace(1) %r, ptr addrspace(1) %a, ptr addrspace(1) %b) #2 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
-  %a.val = load volatile half, half addrspace(1)* %gep.a
-  %b.val = load volatile half, half addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds half, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.r = getelementptr inbounds half, ptr addrspace(1) %r, i64 %tid.ext
+  %a.val = load volatile half, ptr addrspace(1) %gep.a
+  %b.val = load volatile half, ptr addrspace(1) %gep.b
   %r.val = fdiv half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %gep.r
+  store half %r.val, ptr addrspace(1) %gep.r
   ret void
 }
 
@@ -234,10 +234,10 @@ entry:
 
 ; GFX8PLUS: v_mul_f16_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.*}}, [[MUL]]
-define amdgpu_kernel void @div_afn_2_x_pat_f16(half addrspace(1)* %out) #0 {
-  %x = load half, half addrspace(1)* undef
+define amdgpu_kernel void @div_afn_2_x_pat_f16(ptr addrspace(1) %out) #0 {
+  %x = load half, ptr addrspace(1) undef
   %rcp = fdiv afn half %x, 2.0
-  store half %rcp, half addrspace(1)* %out, align 4
+  store half %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -246,10 +246,10 @@ define amdgpu_kernel void @div_afn_2_x_pat_f16(half addrspace(1)* %out) #0 {
 
 ; GFX8PLUS: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}}
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.*}}, [[MUL]]
-define amdgpu_kernel void @div_afn_k_x_pat_f16(half addrspace(1)* %out) #0 {
-  %x = load half, half addrspace(1)* undef
+define amdgpu_kernel void @div_afn_k_x_pat_f16(ptr addrspace(1) %out) #0 {
+  %x = load half, ptr addrspace(1) undef
   %rcp = fdiv afn half %x, 10.0
-  store half %rcp, half addrspace(1)* %out, align 4
+  store half %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -258,10 +258,10 @@ define amdgpu_kernel void @div_afn_k_x_pat_f16(half addrspace(1)* %out) #0 {
 
 ; GFX8PLUS: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}}
 ; GFX8PLUS: {{flat|global}}_store_{{short|b16}} v{{.*}}, [[MUL]]
-define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(half addrspace(1)* %out) #0 {
-  %x = load half, half addrspace(1)* undef
+define amdgpu_kernel void @div_afn_neg_k_x_pat_f16(ptr addrspace(1) %out) #0 {
+  %x = load half, ptr addrspace(1) undef
   %rcp = fdiv afn half %x, -10.0
-  store half %rcp, half addrspace(1)* %out, align 4
+  store half %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
index c48cb9766eb24..1a925788672cf 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll
@@ -29,12 +29,12 @@
 ; GCN: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]]
 ; GCN: buffer_store_dwordx2 [[RESULT]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1
-  %num = load volatile double, double addrspace(1)* %in
-  %den = load volatile double, double addrspace(1)* %gep.1
+define amdgpu_kernel void @fdiv_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %gep.1 = getelementptr double, ptr addrspace(1) %in, i32 1
+  %num = load volatile double, ptr addrspace(1) %in
+  %den = load volatile double, ptr addrspace(1) %gep.1
   %result = fdiv double %num, %den
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -68,69 +68,69 @@ define double @v_rcp_f64_afn(double %x) #0 {
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_s_v:
-define amdgpu_kernel void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) #0 {
-  %den = load double, double addrspace(1)* %in
+define amdgpu_kernel void @fdiv_f64_s_v(ptr addrspace(1) %out, ptr addrspace(1) %in, double %num) #0 {
+  %den = load double, ptr addrspace(1) %in
   %result = fdiv double %num, %den
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_v_s:
-define amdgpu_kernel void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) #0 {
-  %num = load double, double addrspace(1)* %in
+define amdgpu_kernel void @fdiv_f64_v_s(ptr addrspace(1) %out, ptr addrspace(1) %in, double %den) #0 {
+  %num = load double, ptr addrspace(1) %in
   %result = fdiv double %num, %den
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fdiv_f64_s_s:
-define amdgpu_kernel void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) #0 {
+define amdgpu_kernel void @fdiv_f64_s_s(ptr addrspace(1) %out, double %num, double %den) #0 {
   %result = fdiv double %num, %den
-  store double %result, double addrspace(1)* %out
+  store double %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fdiv_v2f64:
-define amdgpu_kernel void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 {
-  %gep.1 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in, i32 1
-  %num = load <2 x double>, <2 x double> addrspace(1)* %in
-  %den = load <2 x double>, <2 x double> addrspace(1)* %gep.1
+define amdgpu_kernel void @v_fdiv_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %gep.1 = getelementptr <2 x double>, ptr addrspace(1) %in, i32 1
+  %num = load <2 x double>, ptr addrspace(1) %in
+  %den = load <2 x double>, ptr addrspace(1) %gep.1
   %result = fdiv <2 x double> %num, %den
-  store <2 x double> %result, <2 x double> addrspace(1)* %out
+  store <2 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fdiv_v2f64:
-define amdgpu_kernel void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) {
+define amdgpu_kernel void @s_fdiv_v2f64(ptr addrspace(1) %out, <2 x double> %num, <2 x double> %den) {
   %result = fdiv <2 x double> %num, %den
-  store <2 x double> %result, <2 x double> addrspace(1)* %out
+  store <2 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fdiv_v4f64:
-define amdgpu_kernel void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 {
-  %gep.1 = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %num = load <4 x double>, <4 x double> addrspace(1)* %in
-  %den = load <4 x double>, <4 x double> addrspace(1)* %gep.1
+define amdgpu_kernel void @v_fdiv_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %gep.1 = getelementptr <4 x double>, ptr addrspace(1) %in, i32 1
+  %num = load <4 x double>, ptr addrspace(1) %in
+  %den = load <4 x double>, ptr addrspace(1) %gep.1
   %result = fdiv <4 x double> %num, %den
-  store <4 x double> %result, <4 x double> addrspace(1)* %out
+  store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fdiv_v4f64:
-define amdgpu_kernel void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) #0 {
+define amdgpu_kernel void @s_fdiv_v4f64(ptr addrspace(1) %out, <4 x double> %num, <4 x double> %den) #0 {
   %result = fdiv <4 x double> %num, %den
-  store <4 x double> %result, <4 x double> addrspace(1)* %out
+  store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}div_fast_2_x_pat_f64:
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0.5
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_2_x_pat_f64(double addrspace(1)* %out) #1 {
-  %x = load double, double addrspace(1)* undef
+define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #1 {
+  %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, 2.0
-  store double %rcp, double addrspace(1)* %out, align 4
+  store double %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -139,10 +139,10 @@ define amdgpu_kernel void @div_fast_2_x_pat_f64(double addrspace(1)* %out) #1 {
 ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0x3fb99999
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_k_x_pat_f64(double addrspace(1)* %out) #1 {
-  %x = load double, double addrspace(1)* undef
+define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #1 {
+  %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, 10.0
-  store double %rcp, double addrspace(1)* %out, align 4
+  store double %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -151,10 +151,10 @@ define amdgpu_kernel void @div_fast_k_x_pat_f64(double addrspace(1)* %out) #1 {
 ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0xbfb99999
 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]]
 ; GCN: buffer_store_dwordx2 [[MUL]]
-define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(double addrspace(1)* %out) #1 {
-  %x = load double, double addrspace(1)* undef
+define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) #1 {
+  %x = load double, ptr addrspace(1) undef
   %rcp = fdiv fast double %x, -10.0
-  store double %rcp, double addrspace(1)* %out, align 4
+  store double %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index d7673e8ae7bbc..73c759ee51a24 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -33,10 +33,10 @@
 ; GFX10: s_denorm_mode 12
 ; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
-define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv ninf float %a, %b
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -69,10 +69,10 @@ entry:
 
 ; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
-define amdgpu_kernel void @fdiv_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
+define amdgpu_kernel void @fdiv_f32_denormals(ptr addrspace(1) %out, float %a, float %b) #2 {
 entry:
   %fdiv = fdiv float %a, %b
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -82,10 +82,10 @@ entry:
 ; GCN: v_rcp_f32
 ; GCN: v_mul_f32
 ; GCN: v_mul_f32
-define amdgpu_kernel void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_25ulp_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv float %a, %b, !fpmath !0
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -94,10 +94,10 @@ entry:
 ; GCN: v_fma_f32
 ; GCN: v_div_fmas_f32
 ; GCN: v_div_fixup_f32
-define amdgpu_kernel void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
+define amdgpu_kernel void @fdiv_25ulp_denormals_f32(ptr addrspace(1) %out, float %a, float %b) #2 {
 entry:
   %fdiv = fdiv float %a, %b, !fpmath !0
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -108,10 +108,10 @@ entry:
 ; PREGFX10-NOT: s_setreg
 ; GFX10-NOT: s_denorm_mode
 ; GCN: buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
+define amdgpu_kernel void @fdiv_fast_denormals_f32(ptr addrspace(1) %out, float %a, float %b) #2 {
 entry:
   %fdiv = fdiv fast float %a, %b
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -123,10 +123,10 @@ entry:
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv fast float %a, %b
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -138,10 +138,10 @@ entry:
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv fast float %a, %b, !fpmath !0
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -153,10 +153,10 @@ entry:
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @fdiv_f32_arcp_math(ptr addrspace(1) %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv arcp ninf float %a, %b
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -170,10 +170,10 @@ entry:
 ; GCN: v_div_scale_f32
 ; GCN: v_div_scale_f32
 ; GCN: v_div_scale_f32
-define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fdiv_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
   %fdiv = fdiv <2 x float> %a, %b
-  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+  store <2 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -181,10 +181,10 @@ entry:
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
 ; GCN-NOT: v_cmp_gt_f32
-define amdgpu_kernel void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fdiv_ulp25_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
   %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
-  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+  store <2 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -196,10 +196,10 @@ entry:
 
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
-define amdgpu_kernel void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fdiv_v2f32_fast_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
   %fdiv = fdiv fast <2 x float> %a, %b
-  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+  store <2 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -211,10 +211,10 @@ entry:
 
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
-define amdgpu_kernel void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @fdiv_v2f32_arcp_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
 entry:
   %fdiv = fdiv arcp ninf <2 x float> %a, %b
-  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+  store <2 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -232,12 +232,12 @@ entry:
 ; GCN: v_div_fixup_f32
 ; GCN: v_div_fixup_f32
 ; GCN: v_div_fixup_f32
-define amdgpu_kernel void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1) * %in
-  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
+define amdgpu_kernel void @fdiv_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr
   %result = fdiv <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -255,12 +255,12 @@ define amdgpu_kernel void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
-define amdgpu_kernel void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1) * %in
-  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
+define amdgpu_kernel void @fdiv_v4f32_fast_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr
   %result = fdiv fast <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -278,12 +278,12 @@ define amdgpu_kernel void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out,
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
 ; GCN: v_rcp_f32
-define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1) * %in
-  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
+define amdgpu_kernel void @fdiv_v4f32_arcp_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr
   %result = fdiv arcp ninf <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -308,10 +308,10 @@ define amdgpu_kernel void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out,
 ; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
 
-define amdgpu_kernel void @fdiv_f32_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #0 {
+define amdgpu_kernel void @fdiv_f32_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #0 {
 entry:
   %fdiv = fdiv float 1.000000e+00, %a
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -343,10 +343,10 @@ entry:
 
 ; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]]
 ; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]],
-define amdgpu_kernel void @fdiv_f32_denorms_correctly_rounded_divide_sqrt(float addrspace(1)* %out, float %a) #2 {
+define amdgpu_kernel void @fdiv_f32_denorms_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #2 {
 entry:
   %fdiv = fdiv float 1.000000e+00, %a
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index 919c62fbb96c4..f6ac0f6dd2a5b 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -14,10 +14,10 @@
 ; GCN-FLUSH:      v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]]
 
 ; GCN:            global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_1_by_x_25ulp(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv float 1.000000e+00, %load, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -34,10 +34,10 @@ define amdgpu_kernel void @div_1_by_x_25ulp(float addrspace(1)* %arg) {
 ; GCN-FLUSH:      v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]]
 
 ; GCN:            global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv float -1.000000e+00, %load, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -54,11 +54,11 @@ define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) {
 ; GCN-FLUSH:      v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]]
 
 ; GCN:            global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_minus_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fneg float %load
   %div = fdiv float 1.000000e+00, %neg, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -75,11 +75,11 @@ define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
 ; GCN-FLUSH:      v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]]
 
 ; GCN:            global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fsub float -0.000000e+00, %load
   %div = fdiv float -1.000000e+00, %neg, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -113,10 +113,10 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
 ; GCN-FLUSH:      v_rcp_f32_e32
 ; GCN-FLUSH:      v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
 ; GCN-FLUSH:      global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_1_by_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -149,10 +149,10 @@ define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 ; GCN-FLUSH:      v_rcp_f32_e64
 ; GCN-FLUSH:      v_rcp_f32_e64
 ; GCN-FLUSH:      v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
-define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %load, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -186,11 +186,11 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
 ; GCN-FLUSH:      v_rcp_f32_e64
 ; GCN-FLUSH:      v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]]
 ; GCN-FLUSH:      global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -224,11 +224,11 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
 ; GCN-FLUSH:      v_rcp_f32_e32
 ; GCN-FLUSH:      v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]]
 ; GCN-FLUSH:      global_store_dwordx4 v{{[0-9]+}}, v[[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -268,10 +268,10 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 ; GCN-FLUSH-NOT:  v_div
 
 ; GCN:            global_store_dwordx4
-define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_c_by_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %load, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -311,11 +311,11 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 ; GCN-FLUSH-NOT:  v_div
 
 ; GCN:            global_store_dwordx4
-define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
-  %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
+define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(ptr addrspace(1) %arg) {
+  %load = load <4 x float>, ptr addrspace(1) %arg, align 16
   %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0
-  store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
+  store <4 x float> %div, ptr addrspace(1) %arg, align 16
   ret void
 }
 
@@ -337,10 +337,10 @@ define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %
 ; GCN-FLUSH:      v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]]
 
 ; GCN:            global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_v_by_x_25ulp(float addrspace(1)* %arg, float %num) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_v_by_x_25ulp(ptr addrspace(1) %arg, float %num) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv float %num, %load, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -348,10 +348,10 @@ define amdgpu_kernel void @div_v_by_x_25ulp(float addrspace(1)* %arg, float %num
 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
 ; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]:[0-9]+\]}}
-define amdgpu_kernel void @div_1_by_x_fast(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_x_fast(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv fast float 1.000000e+00, %load, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -359,10 +359,10 @@ define amdgpu_kernel void @div_1_by_x_fast(float addrspace(1)* %arg) {
 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
 ; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_x_fast(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv fast float -1.000000e+00, %load, !fpmath !0
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -370,11 +370,11 @@ define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) {
 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]]
 ; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_minus_x_fast(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fneg float %load, !fpmath !0
   %div = fdiv fast float 1.000000e+00, %neg
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -382,11 +382,11 @@ define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) {
 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]]
 ; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_minus_x_fast(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fsub float -0.000000e+00, %load, !fpmath !0
   %div = fdiv fast float -1.000000e+00, %neg
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -396,10 +396,10 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg)
 ; GCN-DAG: v_div_scale_f32
 ; GCN:     v_div_fmas_f32
 ; GCN:     v_div_fixup_f32
-define amdgpu_kernel void @div_1_by_x_correctly_rounded(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_x_correctly_rounded(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv float 1.000000e+00, %load
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -409,10 +409,10 @@ define amdgpu_kernel void @div_1_by_x_correctly_rounded(float addrspace(1)* %arg
 ; GCN-DAG: v_div_scale_f32
 ; GCN:     v_div_fmas_f32
 ; GCN:     v_div_fixup_f32
-define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %div = fdiv float -1.000000e+00, %load
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -422,11 +422,11 @@ define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(float addrspace(1)
 ; GCN-DAG: v_div_scale_f32
 ; GCN:     v_div_fmas_f32
 ; GCN:     v_div_fixup_f32
-define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fsub float -0.000000e+00, %load
   %div = fdiv float 1.000000e+00, %neg
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 
@@ -436,11 +436,11 @@ define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)
 ; GCN-DAG: v_div_scale_f32
 ; GCN:     v_div_fmas_f32
 ; GCN:     v_div_fixup_f32
-define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
-  %load = load float, float addrspace(1)* %arg, align 4
+define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(ptr addrspace(1) %arg) {
+  %load = load float, ptr addrspace(1) %arg, align 4
   %neg = fsub float -0.000000e+00, %load
   %div = fdiv float -1.000000e+00, %neg
-  store float %div, float addrspace(1)* %arg, align 4
+  store float %div, ptr addrspace(1) %arg, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
index b1af2df3d7241..b744eb651cc93 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
@@ -19,9 +19,9 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 ; SI: v_cndmask_b32_e32
 ; SI: v_add_f64
 ; SI: s_endpgm
-define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
+define amdgpu_kernel void @ffloor_f64(ptr addrspace(1) %out, double %x) {
   %y = call fast double @llvm.floor.f64(double %x) nounwind readnone
-  store double %y, double addrspace(1)* %out
+  store double %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -34,10 +34,10 @@ define amdgpu_kernel void @ffloor_f64(double addrspace(1)* %out, double %x) {
 ; SI: v_cndmask_b32_e32
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[INPUT]]
 ; SI: s_endpgm
-define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
+define amdgpu_kernel void @ffloor_f64_neg(ptr addrspace(1) %out, double %x) {
   %neg = fsub nsz double 0.0, %x
   %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
-  store double %y, double addrspace(1)* %out
+  store double %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -50,20 +50,20 @@ define amdgpu_kernel void @ffloor_f64_neg(double addrspace(1)* %out, double %x)
 ; SI: v_cndmask_b32_e32
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -|[[INPUT]]|
 ; SI: s_endpgm
-define amdgpu_kernel void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
+define amdgpu_kernel void @ffloor_f64_neg_abs(ptr addrspace(1) %out, double %x) {
   %abs = call fast double @llvm.fabs.f64(double %x)
   %neg = fsub nsz double 0.0, %abs
   %y = call fast double @llvm.floor.f64(double %neg) nounwind readnone
-  store double %y, double addrspace(1)* %out
+  store double %y, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}ffloor_v2f64:
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
-define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+define amdgpu_kernel void @ffloor_v2f64(ptr addrspace(1) %out, <2 x double> %x) {
   %y = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
-  store <2 x double> %y, <2 x double> addrspace(1)* %out
+  store <2 x double> %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -72,9 +72,9 @@ define amdgpu_kernel void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x do
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
 ; CI-NOT: v_floor_f64_e32
-define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+define amdgpu_kernel void @ffloor_v3f64(ptr addrspace(1) %out, <3 x double> %x) {
   %y = call fast <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
-  store <3 x double> %y, <3 x double> addrspace(1)* %out
+  store <3 x double> %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -83,9 +83,9 @@ define amdgpu_kernel void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x do
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
-define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+define amdgpu_kernel void @ffloor_v4f64(ptr addrspace(1) %out, <4 x double> %x) {
   %y = call fast <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
-  store <4 x double> %y, <4 x double> addrspace(1)* %out
+  store <4 x double> %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -98,9 +98,9 @@ define amdgpu_kernel void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x do
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
-define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+define amdgpu_kernel void @ffloor_v8f64(ptr addrspace(1) %out, <8 x double> %x) {
   %y = call fast <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
-  store <8 x double> %y, <8 x double> addrspace(1)* %out
+  store <8 x double> %y, ptr addrspace(1) %out
   ret void
 }
 
@@ -121,8 +121,8 @@ define amdgpu_kernel void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x do
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
 ; CI: v_floor_f64_e32
-define amdgpu_kernel void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+define amdgpu_kernel void @ffloor_v16f64(ptr addrspace(1) %out, <16 x double> %x) {
   %y = call fast <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
-  store <16 x double> %y, <16 x double> addrspace(1)* %out
+  store <16 x double> %y, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/ffloor.ll b/llvm/test/CodeGen/AMDGPU/ffloor.ll
index 720fe7a45e3df..c2f93c649a45e 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.ll
@@ -5,9 +5,9 @@
 ; FUNC-LABEL: {{^}}floor_f32:
 ; SI: v_floor_f32_e32
 ; R600: FLOOR
-define amdgpu_kernel void @floor_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @floor_f32(ptr addrspace(1) %out, float %in) {
   %tmp = call float @llvm.floor.f32(float %in) #0
-  store float %tmp, float addrspace(1)* %out
+  store float %tmp, ptr addrspace(1) %out
   ret void
 }
 
@@ -15,9 +15,9 @@ define amdgpu_kernel void @floor_f32(float addrspace(1)* %out, float %in) {
 ; SI: v_floor_f32_e32
 ; SI: v_floor_f32_e32
 
-define amdgpu_kernel void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @floor_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
   %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0
-  store <2 x float> %tmp, <2 x float> addrspace(1)* %out
+  store <2 x float> %tmp, ptr addrspace(1) %out
   ret void
 }
 
@@ -31,9 +31,9 @@ define amdgpu_kernel void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x floa
 ; R600: FLOOR
 ; R600: FLOOR
 ; R600: FLOOR
-define amdgpu_kernel void @floor_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @floor_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
   %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) #0
-  store <4 x float> %tmp, <4 x float> addrspace(1)* %out
+  store <4 x float> %tmp, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
index 80308b52c8665..058b9784c9001 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -21,20 +21,20 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %mul, %c
-  store double %fma, double addrspace(1)* %gep.out
+  store double %fma, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -49,25 +49,25 @@ define amdgpu_kernel void @combine_to_fma_f64_0(double addrspace(1)* noalias %ou
 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
-
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
-  %d = load volatile double, double addrspace(1)* %gep.3
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.out.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.out.1 = getelementptr double, ptr addrspace(1) %gep.out.0, i32 1
+
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
+  %d = load volatile double, ptr addrspace(1) %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fadd double %mul, %c
   %fma1 = fadd double %mul, %d
-  store volatile double %fma0, double addrspace(1)* %gep.out.0
-  store volatile double %fma1, double addrspace(1)* %gep.out.1
+  store volatile double %fma0, ptr addrspace(1) %gep.out.0
+  store volatile double %fma1, ptr addrspace(1) %gep.out.1
   ret void
 }
 
@@ -78,20 +78,20 @@ define amdgpu_kernel void @combine_to_fma_f64_0_2use(double addrspace(1)* noalia
 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_f64_1(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %c, %mul
-  store double %fma, double addrspace(1)* %gep.out
+  store double %fma, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -102,20 +102,20 @@ define amdgpu_kernel void @combine_to_fma_f64_1(double addrspace(1)* noalias %ou
 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %mul, %c
-  store double %fma, double addrspace(1)* %gep.out
+  store double %fma, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -130,25 +130,25 @@ define amdgpu_kernel void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalia
 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
-
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
-  %d = load volatile double, double addrspace(1)* %gep.3
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.out.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.out.1 = getelementptr double, ptr addrspace(1) %gep.out.0, i32 1
+
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
+  %d = load volatile double, ptr addrspace(1) %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %mul, %c
   %fma1 = fsub double %mul, %d
-  store volatile double %fma0, double addrspace(1)* %gep.out.0
-  store volatile double %fma1, double addrspace(1)* %gep.out.1
+  store volatile double %fma0, ptr addrspace(1) %gep.out.0
+  store volatile double %fma1, ptr addrspace(1) %gep.out.1
   ret void
 }
 
@@ -159,20 +159,20 @@ define amdgpu_kernel void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* n
 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %c, %mul
-  store double %fma, double addrspace(1)* %gep.out
+  store double %fma, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -187,25 +187,25 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalia
 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
-
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
-  %d = load volatile double, double addrspace(1)* %gep.3
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.out.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.out.1 = getelementptr double, ptr addrspace(1) %gep.out.0, i32 1
+
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
+  %d = load volatile double, ptr addrspace(1) %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %c, %mul
   %fma1 = fsub double %d, %mul
-  store volatile double %fma0, double addrspace(1)* %gep.out.0
-  store volatile double %fma1, double addrspace(1)* %gep.out.1
+  store volatile double %fma0, ptr addrspace(1) %gep.out.0
+  store volatile double %fma1, ptr addrspace(1) %gep.out.1
   ret void
 }
 
@@ -216,22 +216,22 @@ define amdgpu_kernel void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* n
 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16 glc{{$}}
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
   %fma = fsub double %mul.neg, %c
 
-  store double %fma, double addrspace(1)* %gep.out
+  store double %fma, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -246,27 +246,27 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalia
 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
-
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
-  %d = load volatile double, double addrspace(1)* %gep.3
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.out.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.out.1 = getelementptr double, ptr addrspace(1) %gep.out.0, i32 1
+
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
+  %d = load volatile double, ptr addrspace(1) %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
   %fma0 = fsub double %mul.neg, %c
   %fma1 = fsub double %mul.neg, %d
 
-  store volatile double %fma0, double addrspace(1)* %gep.out.0
-  store volatile double %fma1, double addrspace(1)* %gep.out.1
+  store volatile double %fma0, ptr addrspace(1) %gep.out.0
+  store volatile double %fma1, ptr addrspace(1) %gep.out.1
   ret void
 }
 
@@ -281,27 +281,27 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(
 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
-define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
-
-  %a = load volatile double, double addrspace(1)* %gep.0
-  %b = load volatile double, double addrspace(1)* %gep.1
-  %c = load volatile double, double addrspace(1)* %gep.2
-  %d = load volatile double, double addrspace(1)* %gep.3
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.out.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.out.1 = getelementptr double, ptr addrspace(1) %gep.out.0, i32 1
+
+  %a = load volatile double, ptr addrspace(1) %gep.0
+  %b = load volatile double, ptr addrspace(1) %gep.1
+  %c = load volatile double, ptr addrspace(1) %gep.2
+  %d = load volatile double, ptr addrspace(1) %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
   %fma0 = fsub double %mul.neg, %c
   %fma1 = fsub double %mul, %d
 
-  store volatile double %fma0, double addrspace(1)* %gep.out.0
-  store volatile double %fma1, double addrspace(1)* %gep.out.1
+  store volatile double %fma0, ptr addrspace(1) %gep.out.0
+  store volatile double %fma1, ptr addrspace(1) %gep.out.1
   ret void
 }
 
@@ -322,26 +322,26 @@ define amdgpu_kernel void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(
 ; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
 
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
-
-  %x = load volatile double, double addrspace(1)* %gep.0
-  %y = load volatile double, double addrspace(1)* %gep.1
-  %z = load volatile double, double addrspace(1)* %gep.2
-  %u = load volatile double, double addrspace(1)* %gep.3
-  %v = load volatile double, double addrspace(1)* %gep.4
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.4 = getelementptr double, ptr addrspace(1) %gep.0, i32 4
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
+
+  %x = load volatile double, ptr addrspace(1) %gep.0
+  %y = load volatile double, ptr addrspace(1) %gep.1
+  %z = load volatile double, ptr addrspace(1) %gep.2
+  %u = load volatile double, ptr addrspace(1) %gep.3
+  %v = load volatile double, ptr addrspace(1) %gep.4
 
   %tmp0 = fmul double %u, %v
   %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
   %tmp2 = fsub double %tmp1, %z
 
-  store double %tmp2, double addrspace(1)* %gep.out
+  store double %tmp2, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -363,27 +363,27 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_0_f64(double addrspace
 ; SI-UNSAFE: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
 
 ; SI: buffer_store_dwordx2 [[RESULT]]
-define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
-  %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
-  %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
-
-  %x = load volatile double, double addrspace(1)* %gep.0
-  %y = load volatile double, double addrspace(1)* %gep.1
-  %z = load volatile double, double addrspace(1)* %gep.2
-  %u = load volatile double, double addrspace(1)* %gep.3
-  %v = load volatile double, double addrspace(1)* %gep.4
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr double, ptr addrspace(1) %gep.0, i32 2
+  %gep.3 = getelementptr double, ptr addrspace(1) %gep.0, i32 3
+  %gep.4 = getelementptr double, ptr addrspace(1) %gep.0, i32 4
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
+
+  %x = load volatile double, ptr addrspace(1) %gep.0
+  %y = load volatile double, ptr addrspace(1) %gep.1
+  %z = load volatile double, ptr addrspace(1) %gep.2
+  %u = load volatile double, ptr addrspace(1) %gep.3
+  %v = load volatile double, ptr addrspace(1) %gep.4
 
   ; nsz flag is needed since this combine may change sign of zero
   %tmp0 = fmul nsz double %u, %v
   %tmp1 = call nsz double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
   %tmp2 = fsub nsz double %x, %tmp1
 
-  store double %tmp2, double addrspace(1)* %gep.out
+  store double %tmp2, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -396,14 +396,14 @@ define amdgpu_kernel void @aggressive_combine_to_fma_fsub_1_f64(double addrspace
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
-                                        float addrspace(1)* %in1,
-                                        float addrspace(1)* %in2) {
-  %x = load volatile float, float addrspace(1)* %in1
-  %y = load volatile float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_add_x_one_y(ptr addrspace(1) %out,
+                                        ptr addrspace(1) %in1,
+                                        ptr addrspace(1) %in2) {
+  %x = load volatile float, ptr addrspace(1) %in1
+  %y = load volatile float, ptr addrspace(1) %in2
   %a = fadd float %x, 1.0
   %m = fmul float %a, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -412,14 +412,14 @@ define amdgpu_kernel void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
-                                        float addrspace(1)* %in1,
-                                        float addrspace(1)* %in2) {
-  %x = load volatile float, float addrspace(1)* %in1
-  %y = load volatile float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_add_x_one(ptr addrspace(1) %out,
+                                        ptr addrspace(1) %in1,
+                                        ptr addrspace(1) %in2) {
+  %x = load volatile float, ptr addrspace(1) %in1
+  %y = load volatile float, ptr addrspace(1) %in2
   %a = fadd float %x, 1.0
   %m = fmul float %y, %a
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -428,14 +428,14 @@ define amdgpu_kernel void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
-                                           float addrspace(1)* %in1,
-                                           float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_add_x_negone_y(ptr addrspace(1) %out,
+                                           ptr addrspace(1) %in1,
+                                           ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %a = fadd float %x, -1.0
   %m = fmul float %a, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -444,14 +444,14 @@ define amdgpu_kernel void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
-                                           float addrspace(1)* %in1,
-                                           float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_add_x_negone(ptr addrspace(1) %out,
+                                           ptr addrspace(1) %in1,
+                                           ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %a = fadd float %x, -1.0
   %m = fmul float %y, %a
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -460,14 +460,14 @@ define amdgpu_kernel void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
-                                        float addrspace(1)* %in1,
-                                        float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_sub_one_x_y(ptr addrspace(1) %out,
+                                        ptr addrspace(1) %in1,
+                                        ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float 1.0, %x
   %m = fmul float %s, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -476,14 +476,14 @@ define amdgpu_kernel void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
-                                        float addrspace(1)* %in1,
-                                        float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_sub_one_x(ptr addrspace(1) %out,
+                                        ptr addrspace(1) %in1,
+                                        ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float 1.0, %x
   %m = fmul float %y, %s
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -492,14 +492,14 @@ define amdgpu_kernel void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
-                                           float addrspace(1)* %in1,
-                                           float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(ptr addrspace(1) %out,
+                                           ptr addrspace(1) %in1,
+                                           ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float -1.0, %x
   %m = fmul float %s, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -508,14 +508,14 @@ define amdgpu_kernel void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, -[[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
-                                         float addrspace(1)* %in1,
-                                         float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(ptr addrspace(1) %out,
+                                         ptr addrspace(1) %in1,
+                                         ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float -1.0, %x
   %m = fmul float %y, %s
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -524,14 +524,14 @@ define amdgpu_kernel void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
-                                        float addrspace(1)* %in1,
-                                        float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_sub_x_one_y(ptr addrspace(1) %out,
+                                        ptr addrspace(1) %in1,
+                                        ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float %x, 1.0
   %m = fmul float %s, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -540,14 +540,14 @@ define amdgpu_kernel void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], -[[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
-                                      float addrspace(1)* %in1,
-                                      float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_sub_x_one(ptr addrspace(1) %out,
+                                      ptr addrspace(1) %in1,
+                                      ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float %x, 1.0
   %m = fmul float %y, %s
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -556,14 +556,14 @@ define amdgpu_kernel void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VS]], [[VY:v[0-9]]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
-                                         float addrspace(1)* %in1,
-                                         float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(ptr addrspace(1) %out,
+                                         ptr addrspace(1) %in1,
+                                         ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float %x, -1.0
   %m = fmul float %s, %y
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -572,14 +572,14 @@ define amdgpu_kernel void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
 ; SI-NOFMA: v_mul_f32_e32 {{v[0-9]}}, [[VY:v[0-9]]], [[VS]]
 ;
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VY:v[0-9]]], [[VY:v[0-9]]]
-define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
-                                         float addrspace(1)* %in1,
-                                         float addrspace(1)* %in2) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
+define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(ptr addrspace(1) %out,
+                                         ptr addrspace(1) %in1,
+                                         ptr addrspace(1) %in2) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
   %s = fsub float %x, -1.0
   %m = fmul float %y, %s
-  store float %m, float addrspace(1)* %out
+  store float %m, ptr addrspace(1) %out
   ret void
 }
 
@@ -594,18 +594,18 @@ define amdgpu_kernel void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
 ;
 ; SI-FMA: v_fma_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
 ; SI-FMA: v_fma_f32 {{v[0-9]}}, [[VX:v[0-9]]], [[VT]], [[VR]]
-define amdgpu_kernel void @test_f32_interp(float addrspace(1)* %out,
-                             float addrspace(1)* %in1,
-                             float addrspace(1)* %in2,
-                             float addrspace(1)* %in3) {
-  %x = load float, float addrspace(1)* %in1
-  %y = load float, float addrspace(1)* %in2
-  %t = load float, float addrspace(1)* %in3
+define amdgpu_kernel void @test_f32_interp(ptr addrspace(1) %out,
+                             ptr addrspace(1) %in1,
+                             ptr addrspace(1) %in2,
+                             ptr addrspace(1) %in3) {
+  %x = load float, ptr addrspace(1) %in1
+  %y = load float, ptr addrspace(1) %in2
+  %t = load float, ptr addrspace(1) %in3
   %t1 = fsub float 1.0, %t
   %tx = fmul float %x, %t
   %ty = fmul float %y, %t1
   %r = fadd float %tx, %ty
-  store float %r, float addrspace(1)* %out
+  store float %r, ptr addrspace(1) %out
   ret void
 }
 
@@ -616,18 +616,18 @@ define amdgpu_kernel void @test_f32_interp(float addrspace(1)* %out,
 ;
 ; SI-FMA: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
 ; SI-FMA: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
-define amdgpu_kernel void @test_f64_interp(double addrspace(1)* %out,
-                             double addrspace(1)* %in1,
-                             double addrspace(1)* %in2,
-                             double addrspace(1)* %in3) {
-  %x = load double, double addrspace(1)* %in1
-  %y = load double, double addrspace(1)* %in2
-  %t = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @test_f64_interp(ptr addrspace(1) %out,
+                             ptr addrspace(1) %in1,
+                             ptr addrspace(1) %in2,
+                             ptr addrspace(1) %in3) {
+  %x = load double, ptr addrspace(1) %in1
+  %y = load double, ptr addrspace(1) %in2
+  %t = load double, ptr addrspace(1) %in3
   %t1 = fsub double 1.0, %t
   %tx = fmul double %x, %t
   %ty = fmul double %y, %t1
   %r = fadd double %tx, %ty
-  store double %r, double addrspace(1)* %out
+  store double %r, ptr addrspace(1) %out
   ret void
 }
 
@@ -638,19 +638,19 @@ define amdgpu_kernel void @test_f64_interp(double addrspace(1)* %out,
 ; SI-NOT: [[A]]
 ; SI-NOT: [[B]]
 ; SI: v_fma_f32 v{{[0-9]+}}, [[A]], 2.0, [[B]]
-define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fma.f32(float -2.0, float %r1.fneg, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -660,19 +660,19 @@ define amdgpu_kernel void @fma_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
 ; SI-NOT: [[A]]
 ; SI-NOT: [[B]]
 ; SI: v_fma_f32 v{{[0-9]+}}, [[A]], -2.0, [[B]]
-define amdgpu_kernel void @fma_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fma_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fma.f32(float 2.0, float %r1.fneg, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -681,22 +681,22 @@ define amdgpu_kernel void @fma_2.0_neg_a_b_f32(float addrspace(1)* %out, float a
 ; SI: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; SI: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; SI: v_fma_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
-define amdgpu_kernel void @fma_neg_b_c_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #2 {
+define amdgpu_kernel void @fma_neg_b_c_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr <4 x float>, <4 x float> addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %gep.1, i32 2
-  %gep.out = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr <4 x float>, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr <4 x float>, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr <4 x float>, ptr addrspace(1) %gep.1, i32 2
+  %gep.out = getelementptr <4 x float>, ptr addrspace(1) %out, i32 %tid
 
-  %tmp0 = load <4 x float>, <4 x float> addrspace(1)* %gep.0
-  %tmp1 = load <4 x float>, <4 x float> addrspace(1)* %gep.1
-  %tmp2 = load <4 x float>, <4 x float> addrspace(1)* %gep.2
+  %tmp0 = load <4 x float>, ptr addrspace(1) %gep.0
+  %tmp1 = load <4 x float>, ptr addrspace(1) %gep.1
+  %tmp2 = load <4 x float>, ptr addrspace(1) %gep.2
 
   %fneg0 = fneg fast <4 x float> %tmp0
   %fneg1 = fneg fast <4 x float> %tmp1
   %fma0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %tmp2, <4 x float> %fneg0, <4 x float> %fneg1)
 
-  store <4 x float> %fma0, <4 x float> addrspace(1)* %gep.out
+  store <4 x float> %fma0, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fma.f64.ll b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
index fa7d9021b628f..75bc89ffc39c0 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
@@ -10,13 +10,13 @@ declare double @llvm.fabs.f64(double) nounwind readnone
 ; FUNC-LABEL: {{^}}fma_f64:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
@@ -25,13 +25,13 @@ define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1
 ; SI: v_fma_f64
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
-                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
-   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
-   %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
+define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load <2 x double>, ptr addrspace(1) %in1
+   %r1 = load <2 x double>, ptr addrspace(1) %in2
+   %r2 = load <2 x double>, ptr addrspace(1) %in3
    %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
-   store <2 x double> %r3, <2 x double> addrspace(1)* %out
+   store <2 x double> %r3, ptr addrspace(1) %out
    ret void
 }
 
@@ -44,171 +44,171 @@ define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x doubl
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
-                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
-   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
-   %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
+define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load <4 x double>, ptr addrspace(1) %in1
+   %r1 = load <4 x double>, ptr addrspace(1) %in2
+   %r2 = load <4 x double>, ptr addrspace(1) %in3
    %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
-   store <4 x double> %r3, <4 x double> addrspace(1)* %out
+   store <4 x double> %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_src0:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_abs_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r0)
    %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_src1:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_abs_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r1)
    %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_src2:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
 ; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
-define amdgpu_kernel void @fma_f64_abs_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r2)
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_neg_src0:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fsub = fsub double -0.000000e+00, %r0
    %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_neg_src1:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fsub = fsub double -0.000000e+00, %r1
    %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_neg_src2:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fsub = fsub double -0.000000e+00, %r2
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_abs_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r0)
    %fsub = fsub double -0.000000e+00, %fabs
    %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_abs_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r1)
    %fsub = fsub double -0.000000e+00, %fabs
    %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
 ; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
-define amdgpu_kernel void @fma_f64_abs_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %fabs = call double @llvm.fabs.f64(double %r2)
    %fsub = fsub double -0.000000e+00, %fabs
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src0:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_lit_src0(double addrspace(1)* %out,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+   %r1 = load double, ptr addrspace(1) %in2
+   %r2 = load double, ptr addrspace(1) %in3
    %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src1:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fma_f64_lit_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in3) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in3) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r2 = load double, ptr addrspace(1) %in3
    %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fma_f64_lit_src2:
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
 ; GFX90A: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
-define amdgpu_kernel void @fma_f64_lit_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
+define amdgpu_kernel void @fma_f64_lit_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0)
-   store double %r3, double addrspace(1)* %out
+   store double %r3, ptr addrspace(1) %out
    ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fma.ll b/llvm/test/CodeGen/AMDGPU/fma.ll
index dcf3254242f4c..b1db04a7fd886 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.ll
@@ -21,13 +21,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
 ; EG: FMA {{\*? *}}[[RES]]
-define amdgpu_kernel void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
-                     float addrspace(1)* %in2, float addrspace(1)* %in3) {
-  %r0 = load float, float addrspace(1)* %in1
-  %r1 = load float, float addrspace(1)* %in2
-  %r2 = load float, float addrspace(1)* %in3
+define amdgpu_kernel void @fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                     ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+  %r0 = load float, ptr addrspace(1) %in1
+  %r1 = load float, ptr addrspace(1) %in2
+  %r2 = load float, ptr addrspace(1) %in3
   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
-  store float %r3, float addrspace(1)* %out
+  store float %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -48,13 +48,13 @@ define float @fmac_to_3addr_f32(float %r0, float %r1, float %r2) {
 ; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
 ; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
 ; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
-define amdgpu_kernel void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
-                       <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
-  %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1
-  %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2
-  %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3
+define amdgpu_kernel void @fma_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+  %r0 = load <2 x float>, ptr addrspace(1) %in1
+  %r1 = load <2 x float>, ptr addrspace(1) %in2
+  %r2 = load <2 x float>, ptr addrspace(1) %in3
   %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
-  store <2 x float> %r3, <2 x float> addrspace(1)* %out
+  store <2 x float> %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -73,44 +73,44 @@ define amdgpu_kernel void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float>
 ; EG-DAG: FMA {{\*? *}}[[RES]].Y
 ; EG-DAG: FMA {{\*? *}}[[RES]].Z
 ; EG-DAG: FMA {{\*? *}}[[RES]].W
-define amdgpu_kernel void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
-                       <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
-  %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1
-  %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2
-  %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3
+define amdgpu_kernel void @fma_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                       ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
+  %r0 = load <4 x float>, ptr addrspace(1) %in1
+  %r1 = load <4 x float>, ptr addrspace(1) %in2
+  %r2 = load <4 x float>, ptr addrspace(1) %in3
   %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
-  store <4 x float> %r3, <4 x float> addrspace(1)* %out
+  store <4 x float> %r3, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
 ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}}
-define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
-  store float %fma, float addrspace(1)* %out.gep, align 4
+  store float %fma, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
 ; FUNC-LABEL: @fma_commute_mul_s_f32
-define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
+define amdgpu_kernel void @fma_commute_mul_s_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b, float %b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %c = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %c = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  store float %fma, float addrspace(1)* %out.gep, align 4
+  store float %fma, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -123,15 +123,15 @@ define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %ou
 
 ; GFX906: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind {
+define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext
-  %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %tmp = load volatile float, float addrspace(1)* %gep.a
-  %tmp1 = load volatile float, float addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds float, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds float, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.out = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %tmp = load volatile float, ptr addrspace(1) %gep.a
+  %tmp1 = load volatile float, ptr addrspace(1) %gep.b
   %tmp2 = fadd contract float %tmp, %tmp
   %tmp3 = fmul contract float %tmp2, 4.0
   %tmp4 = fsub contract float 1.0, %tmp3
@@ -141,7 +141,7 @@ bb:
   %tmp8 = fsub contract float 1.0, %tmp7
   %tmp9 = fmul contract float %tmp8, 8.0
   %tmp10 = fadd contract float %tmp5, %tmp9
-  store float %tmp10, float addrspace(1)* %gep.out
+  store float %tmp10, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
index b186290cf40f8..4addfd3c9ab1f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@@ -4,7 +4,7 @@
 ; GCN-LABEL: {{^}}addMul2D:
 ; GFX1010: v_fmac_f16
 ; GFX1010: v_fmac_f16
-define hidden <4 x half> @addMul2D(<4 x i8>* nocapture readonly %arg, float addrspace(4)* nocapture readonly %arg1, <2 x i32> %arg2, i32 %arg3) local_unnamed_addr #0 {
+define hidden <4 x half> @addMul2D(ptr nocapture readonly %arg, ptr addrspace(4) nocapture readonly %arg1, <2 x i32> %arg2, i32 %arg3) local_unnamed_addr #0 {
 bb:
   %tmp = extractelement <2 x i32> %arg2, i64 1
   %tmp4 = icmp sgt i32 %tmp, 0
@@ -30,13 +30,13 @@ bb14:                                             ; preds = %bb14, %bb11
   %tmp16 = phi i32 [ 0, %bb11 ], [ %tmp30, %bb14 ]
   %tmp17 = add nsw i32 %tmp16, %tmp12
   %tmp18 = sext i32 %tmp17 to i64
-  %tmp19 = getelementptr inbounds <4 x i8>, <4 x i8>* %arg, i64 %tmp18
-  %tmp20 = load <4 x i8>, <4 x i8>* %tmp19, align 4
+  %tmp19 = getelementptr inbounds <4 x i8>, ptr %arg, i64 %tmp18
+  %tmp20 = load <4 x i8>, ptr %tmp19, align 4
   %tmp21 = tail call <4 x half> @_Z13convert_half4Dv4_h(<4 x i8> %tmp20)
   %tmp22 = add nsw i32 %tmp16, %tmp13
   %tmp23 = sext i32 %tmp22 to i64
-  %tmp24 = getelementptr inbounds float, float addrspace(4)* %arg1, i64 %tmp23
-  %tmp25 = load float, float addrspace(4)* %tmp24, align 4
+  %tmp24 = getelementptr inbounds float, ptr addrspace(4) %arg1, i64 %tmp23
+  %tmp25 = load float, ptr addrspace(4) %tmp24, align 4
   %tmp26 = fptrunc float %tmp25 to half
   %tmp27 = insertelement <4 x half> undef, half %tmp26, i32 0
   %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
index 7e82cc139e0a2..fd6e461d9e0a4 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
@@ -14,14 +14,14 @@ declare double @llvm.maxnum.f64(double, double) nounwind readnone
 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[MAX0]], [[QUIET_C]]
 ; SI: buffer_store_dwordx2 [[RESULT]],
 ; SI: s_endpgm
-define amdgpu_kernel void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
-  %bptr = getelementptr double, double addrspace(1)* %aptr, i32 1
-  %cptr = getelementptr double, double addrspace(1)* %aptr, i32 2
-  %a = load volatile double, double addrspace(1)* %aptr, align 8
-  %b = load volatile double, double addrspace(1)* %bptr, align 8
-  %c = load volatile double, double addrspace(1)* %cptr, align 8
+define amdgpu_kernel void @test_fmax3_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind {
+  %bptr = getelementptr double, ptr addrspace(1) %aptr, i32 1
+  %cptr = getelementptr double, ptr addrspace(1) %aptr, i32 2
+  %a = load volatile double, ptr addrspace(1) %aptr, align 8
+  %b = load volatile double, ptr addrspace(1) %bptr, align 8
+  %c = load volatile double, ptr addrspace(1) %cptr, align 8
   %f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
   %f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone
-  store double %f1, double addrspace(1)* %out, align 8
+  store double %f1, ptr addrspace(1) %out, align 8
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index a3194a749be0f..e9dc37a4cada0 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -9,13 +9,13 @@
 ; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
-  %a = load volatile  float, float addrspace(1)* %aptr, align 4
-  %b = load volatile float, float addrspace(1)* %bptr, align 4
-  %c = load volatile float, float addrspace(1)* %cptr, align 4
+define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+  %a = load volatile  float, ptr addrspace(1) %aptr, align 4
+  %b = load volatile float, ptr addrspace(1) %bptr, align 4
+  %c = load volatile float, ptr addrspace(1) %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b)
   %f1 = call float @llvm.maxnum.f32(float %f0, float %c)
-  store float %f1, float addrspace(1)* %out, align 4
+  store float %f1, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -27,13 +27,13 @@ define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float
 ; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 {
-  %a = load volatile float, float addrspace(1)* %aptr, align 4
-  %b = load volatile float, float addrspace(1)* %bptr, align 4
-  %c = load volatile float, float addrspace(1)* %cptr, align 4
+define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+  %a = load volatile float, ptr addrspace(1) %aptr, align 4
+  %b = load volatile float, ptr addrspace(1) %bptr, align 4
+  %c = load volatile float, ptr addrspace(1) %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b)
   %f1 = call float @llvm.maxnum.f32(float %c, float %f0)
-  store float %f1, float addrspace(1)* %out, align 4
+  store float %f1, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -56,13 +56,13 @@ define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float
 
 ; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
 ; GCN: buffer_store_short [[RESULT]],
-define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
-  %a = load volatile half, half addrspace(1)* %aptr, align 2
-  %b = load volatile half, half addrspace(1)* %bptr, align 2
-  %c = load volatile half, half addrspace(1)* %cptr, align 2
+define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+  %a = load volatile half, ptr addrspace(1) %aptr, align 2
+  %b = load volatile half, ptr addrspace(1) %bptr, align 2
+  %c = load volatile half, ptr addrspace(1) %cptr, align 2
   %f0 = call half @llvm.maxnum.f16(half %a, half %b)
   %f1 = call half @llvm.maxnum.f16(half %f0, half %c)
-  store half %f1, half addrspace(1)* %out, align 2
+  store half %f1, ptr addrspace(1) %out, align 2
   ret void
 }
 
@@ -86,13 +86,13 @@ define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half ad
 
 ; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGA]], [[REGB]]
 ; GCN: buffer_store_short [[RESULT]],
-define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 {
-  %a = load volatile half, half addrspace(1)* %aptr, align 2
-  %b = load volatile half, half addrspace(1)* %bptr, align 2
-  %c = load volatile half, half addrspace(1)* %cptr, align 2
+define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+  %a = load volatile half, ptr addrspace(1) %aptr, align 2
+  %b = load volatile half, ptr addrspace(1) %bptr, align 2
+  %c = load volatile half, ptr addrspace(1) %cptr, align 2
   %f0 = call half @llvm.maxnum.f16(half %a, half %b)
   %f1 = call half @llvm.maxnum.f16(half %c, half %f0)
-  store half %f1, half addrspace(1)* %out, align 2
+  store half %f1, ptr addrspace(1) %out, align 2
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
index 2ca90378571af..01b2f207388e8 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
@@ -4,7 +4,7 @@
 
 ; Make sure we don't try to form FMAX_LEGACY nodes with f64
 
-define amdgpu_kernel void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_uge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-LABEL: test_fmax_legacy_uge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -44,19 +44,19 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, d
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load double, double addrspace(1)* %gep.0, align 8
-  %b = load double, double addrspace(1)* %gep.1, align 8
+  %a = load double, ptr addrspace(1) %gep.0, align 8
+  %b = load double, ptr addrspace(1) %gep.1, align 8
 
   %cmp = fcmp uge double %a, %b
   %val = select i1 %cmp, double %a, double %b
-  store double %val, double addrspace(1)* %out, align 8
+  store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_oge_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-LABEL: test_fmax_legacy_oge_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -96,19 +96,19 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, d
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load double, double addrspace(1)* %gep.0, align 8
-  %b = load double, double addrspace(1)* %gep.1, align 8
+  %a = load double, ptr addrspace(1) %gep.0, align 8
+  %b = load double, ptr addrspace(1) %gep.1, align 8
 
   %cmp = fcmp oge double %a, %b
   %val = select i1 %cmp, double %a, double %b
-  store double %val, double addrspace(1)* %out, align 8
+  store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ugt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-LABEL: test_fmax_legacy_ugt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -148,19 +148,19 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, d
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load double, double addrspace(1)* %gep.0, align 8
-  %b = load double, double addrspace(1)* %gep.1, align 8
+  %a = load double, ptr addrspace(1) %gep.0, align 8
+  %b = load double, ptr addrspace(1) %gep.1, align 8
 
   %cmp = fcmp ugt double %a, %b
   %val = select i1 %cmp, double %a, double %b
-  store double %val, double addrspace(1)* %out, align 8
+  store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; SI-LABEL: test_fmax_legacy_ogt_f64:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -200,15 +200,15 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, d
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load double, double addrspace(1)* %gep.0, align 8
-  %b = load double, double addrspace(1)* %gep.1, align 8
+  %a = load double, ptr addrspace(1) %gep.0, align 8
+  %b = load double, ptr addrspace(1) %gep.1, align 8
 
   %cmp = fcmp ogt double %a, %b
   %val = select i1 %cmp, double %a, double %b
-  store double %val, double addrspace(1)* %out, align 8
+  store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index b3ae7e0530bb3..2b08f8efd2bcf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -20,17 +20,17 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %cmp = fcmp uge float %a, %b
   %val = select i1 %cmp, float %a, float %b
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -48,19 +48,19 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, fl
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
   %a.nnan = fadd nnan float %a, 1.0
   %b.nnan = fadd nnan float %b, 2.0
 
   %cmp = fcmp uge float %a.nnan, %b.nnan
   %val = select i1 %cmp, float %a.nnan, float %b.nnan
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -75,17 +75,17 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)*
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %cmp = fcmp oge float %a, %b
   %val = select i1 %cmp, float %a, float %b
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -101,17 +101,17 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, fl
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %cmp = fcmp ugt float %a, %b
   %val = select i1 %cmp, float %a, float %b
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -126,17 +126,17 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, fl
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -152,17 +152,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, fl
 
 ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.0
-  %b = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.1
+  %a = load volatile <1 x float>, ptr addrspace(1) %gep.0
+  %b = load volatile <1 x float>, ptr addrspace(1) %gep.1
 
   %cmp = fcmp ogt <1 x float> %a, %b
   %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
-  store <1 x float> %val, <1 x float> addrspace(1)* %out
+  store <1 x float> %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -185,17 +185,17 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)*
 ; GCN-NONAN: v_max_f32_e32
 
 ; GCN-NOT: v_max
-define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
-  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
+  %a = load <3 x float>, ptr addrspace(1) %gep.0
+  %b = load <3 x float>, ptr addrspace(1) %gep.1
 
   %cmp = fcmp ogt <3 x float> %a, %b
   %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
-  store <3 x float> %val, <3 x float> addrspace(1)* %out
+  store <3 x float> %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -208,18 +208,18 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)*
 ; GCN-NOT: v_max_
 
 ; EG: MAX
-define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
-  store float %val, float addrspace(1)* %out0, align 4
-  store i1 %cmp, i1addrspace(1)* %out1
+  store float %val, ptr addrspace(1) %out0, align 4
+  store i1 %cmp, ptr addrspace(1) %out1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
index 20af278bf98c3..00e6023d057d2 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
@@ -9,18 +9,18 @@ declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
 
 ; FUNC-LABEL: @test_fmax_f64
 ; SI: v_max_f64
-define amdgpu_kernel void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
+define amdgpu_kernel void @test_fmax_f64(ptr addrspace(1) %out, double %a, double %b) nounwind {
   %val = call double @llvm.maxnum.f64(double %a, double %b) #0
-  store double %val, double addrspace(1)* %out, align 8
+  store double %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; FUNC-LABEL: @test_fmax_v2f64
 ; SI: v_max_f64
 ; SI: v_max_f64
-define amdgpu_kernel void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
+define amdgpu_kernel void @test_fmax_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
   %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
-  store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
+  store <2 x double> %val, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -29,9 +29,9 @@ define amdgpu_kernel void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x
 ; SI: v_max_f64
 ; SI: v_max_f64
 ; SI: v_max_f64
-define amdgpu_kernel void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
+define amdgpu_kernel void @test_fmax_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
   %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
-  store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
+  store <4 x double> %val, ptr addrspace(1) %out, align 32
   ret void
 }
 
@@ -44,9 +44,9 @@ define amdgpu_kernel void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x
 ; SI: v_max_f64
 ; SI: v_max_f64
 ; SI: v_max_f64
-define amdgpu_kernel void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
+define amdgpu_kernel void @test_fmax_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
   %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
-  store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
+  store <8 x double> %val, ptr addrspace(1) %out, align 64
   ret void
 }
 
@@ -67,9 +67,9 @@ define amdgpu_kernel void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x
 ; SI: v_max_f64
 ; SI: v_max_f64
 ; SI: v_max_f64
-define amdgpu_kernel void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
+define amdgpu_kernel void @test_fmax_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
   %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
-  store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
+  store <16 x double> %val, ptr addrspace(1) %out, align 128
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index b37bfbe30d594..229e85a08afe7 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -7,9 +7,9 @@
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]]
 ; GCN-NOT: [[RESULT]]
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(ptr addrspace(1) %out, float %a, float %b) #0 {
   %val = call float @llvm.maxnum.f32(float %a, float %b) #1
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -24,9 +24,9 @@ define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 {
 ; GCN-LABEL: {{^}}test_fmax_v2f32:
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
   %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b)
-  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+  store <2 x float> %val, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -35,9 +35,9 @@ define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN-NOT: v_max_f32
-define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, <3 x float> %b) nounwind {
+define amdgpu_kernel void @test_fmax_v3f32(ptr addrspace(1) %out, <3 x float> %a, <3 x float> %b) nounwind {
   %val = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %a, <3 x float> %b) #0
-  store <3 x float> %val, <3 x float> addrspace(1)* %out, align 16
+  store <3 x float> %val, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -46,9 +46,9 @@ define amdgpu_kernel void @test_fmax_v3f32(<3 x float> addrspace(1)* %out, <3 x
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 {
   %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
-  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %val, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -61,9 +61,9 @@ define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 {
   %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b)
-  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %val, ptr addrspace(1) %out, align 32
   ret void
 }
 
@@ -84,9 +84,9 @@ define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
 ; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 {
+define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 {
   %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b)
-  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+  store <16 x float> %val, ptr addrspace(1) %out, align 64
   ret void
 }
 
@@ -94,9 +94,9 @@ define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 1.0, float 2.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -104,9 +104,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 {
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -114,9 +114,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %o
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -124,9 +124,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %o
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -134,9 +134,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %o
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -144,9 +144,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float 0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -154,9 +154,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -164,9 +164,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out
 ; GCN-NOT: v_max_f32_e32
 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
-define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float -0.0, float -0.0)
-  store float %val, float addrspace(1)* %out, align 4
+  store float %val, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 8230fbaf8b866..4e44a9177de13 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -27,7 +27,7 @@ declare float @llvm.fabs.f32(float) #1
 ; GFX8_10: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI:      v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; GFX10:   v_fma_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
-define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
+define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
   %a11 = fadd float %y, -1.0
   %a12 = call float @llvm.fabs.f32(float %a11)
   %a13 = fadd float %x, -1.0
@@ -38,7 +38,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out,
   %a18 = fmul float %a17, %a17
   %a19 = fmul float %a18, %a17
   %a20 = fsub float 1.0, %a19
-  store float %a20, float addrspace(1)* %out
+  store float %a20, ptr addrspace(1) %out
   ret void
 }
 
@@ -49,12 +49,12 @@ define amdgpu_kernel void @multiple_fadd_use_test_f32(float addrspace(1)* %out,
 ; GCN-DAG:   buffer_store_{{dword|b32}} [[MUL2]]
 ; GCN-DAG:   buffer_store_{{dword|b32}} [[MAD]]
 ; GCN:       s_endpgm
-define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out, float %x, [8 x i32], float %y) #0 {
-  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+define amdgpu_kernel void @multiple_use_fadd_fmac_f32(ptr addrspace(1) %out, float %x, [8 x i32], float %y) #0 {
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast float %x, 2.0
   %mad = fadd fast float %mul2, %y
-  store volatile float %mul2, float addrspace(1)* %out
-  store volatile float %mad, float addrspace(1)* %out.gep.1
+  store volatile float %mul2, ptr addrspace(1) %out
+  store volatile float %mad, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -65,13 +65,13 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f32(float addrspace(1)* %out,
 ; GCN-DAG:   buffer_store_{{dword|b32}} [[MUL2]]
 ; GCN-DAG:   buffer_store_{{dword|b32}} [[MAD]]
 ; GCN:       s_endpgm
-define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out, float %x, float %y) #0 {
-  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+define amdgpu_kernel void @multiple_use_fadd_fmad_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %x.abs = call float @llvm.fabs.f32(float %x)
   %mul2 = fmul fast float %x.abs, 2.0
   %mad = fadd fast float %mul2, %y
-  store volatile float %mul2, float addrspace(1)* %out
-  store volatile float %mad, float addrspace(1)* %out.gep.1
+  store volatile float %mul2, ptr addrspace(1) %out
+  store volatile float %mad, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -80,14 +80,14 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f32(float addrspace(1)* %out,
 ; SIVI:  v_mad_f32 {{v[0-9]+}}, |[[X]]|, 2.0, v{{[0-9]+}}
 ; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, {{s[0-9]+}}
 ; GFX10: v_fma_f32 {{v[0-9]+}}, |[[X]]|, 2.0, {{s[0-9]+}}
-define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
-  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %x.abs = call float @llvm.fabs.f32(float %x)
   %mul2 = fmul fast float %x.abs, 2.0
   %mad0 = fadd fast float %mul2, %y
   %mad1 = fadd fast float %mul2, %z
-  store volatile float %mad0, float addrspace(1)* %out
-  store volatile float %mad1, float addrspace(1)* %out.gep.1
+  store volatile float %mad0, ptr addrspace(1) %out
+  store volatile float %mad1, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -95,12 +95,12 @@ define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f32(float addrspace(1)*
 ; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
 ; GCN: buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, float %y) #0 {
-  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+define amdgpu_kernel void @fmul_x2_xn2_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast float %x, 2.0
   %muln2 = fmul fast float %x, -2.0
   %mul = fmul fast float %mul2, %muln2
-  store volatile float %mul, float addrspace(1)* %out
+  store volatile float %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -110,12 +110,12 @@ define amdgpu_kernel void @fmul_x2_xn2_f32(float addrspace(1)* %out, float %x, f
 ; GFX10: v_mul_f32_e64 [[TMP0:v[0-9]+]], 0xc0c00000, [[X:s[0-9]+]]
 ; GCN:   v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
 ; GCN:   buffer_store_{{dword|b32}} [[RESULT]]
-define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, float %y) #0 {
-  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast float %x, 2.0
   %muln2 = fmul fast float %x, -3.0
   %mul = fmul fast float %mul2, %muln2
-  store volatile float %mul, float addrspace(1)* %out
+  store volatile float %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -130,7 +130,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(float addrspace(1)* %out, float %x, f
 ; VI-DENORM:    v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; GFX10-FLUSH:  v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
-define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
+define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
   %z = bitcast i16 %z.arg to half
@@ -144,7 +144,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
   %a18 = fmul half %a17, %a17
   %a19 = fmul half %a18, %a17
   %a20 = fsub half 1.0, %a19
-  store half %a20, half addrspace(1)* %out
+  store half %a20, ptr addrspace(1) %out
   ret void
 }
 
@@ -159,14 +159,14 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(half addrspace(1)* %out, i
 ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
 ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
-  %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+  %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast half %x, 2.0
   %mad = fadd fast half %mul2, %y
-  store volatile half %mul2, half addrspace(1)* %out
-  store volatile half %mad, half addrspace(1)* %out.gep.1
+  store volatile half %mul2, ptr addrspace(1) %out
+  store volatile half %mad, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -181,15 +181,15 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(half addrspace(1)* %out, i
 ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
 ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
-  %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+  %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
   %x.abs = call half @llvm.fabs.f16(half %x)
   %mul2 = fmul fast half %x.abs, 2.0
   %mad = fadd fast half %mul2, %y
-  store volatile half %mul2, half addrspace(1)* %out
-  store volatile half %mad, half addrspace(1)* %out.gep.1
+  store volatile half %mul2, ptr addrspace(1) %out
+  store volatile half %mad, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -206,17 +206,17 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(half addrspace(1)* %out, i
 ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}}
 ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}}
 
-define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
+define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
   %z = bitcast i16 %z.arg to half
-  %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+  %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
   %x.abs = call half @llvm.fabs.f16(half %x)
   %mul2 = fmul fast half %x.abs, 2.0
   %mad0 = fadd fast half %mul2, %y
   %mad1 = fadd fast half %mul2, %z
-  store volatile half %mad0, half addrspace(1)* %out
-  store volatile half %mad1, half addrspace(1)* %out.gep.1
+  store volatile half %mad0, ptr addrspace(1) %out
+  store volatile half %mad1, ptr addrspace(1) %out.gep.1
   ret void
 }
 
@@ -224,14 +224,14 @@ define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(half addrspace(1)* %
 ; GCN: v_mul_f16_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0
 ; GCN: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
 ; GCN: buffer_store_{{short|b16}} [[RESULT]]
-define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @fmul_x2_xn2_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
-  %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+  %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast half %x, 2.0
   %muln2 = fmul fast half %x, -2.0
   %mul = fmul fast half %mul2, %muln2
-  store volatile half %mul, half addrspace(1)* %out
+  store volatile half %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -241,14 +241,14 @@ define amdgpu_kernel void @fmul_x2_xn2_f16(half addrspace(1)* %out, i16 zeroext
 ; GFX10: v_mul_f16_e64 [[TMP0:v[0-9]+]], 0xc600, [[X:s[0-9]+]]
 ; GCN:   v_mul_f16_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
 ; GCN:   buffer_store_{{short|b16}} [[RESULT]]
-define amdgpu_kernel void @fmul_x2_xn3_f16(half addrspace(1)* %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
+define amdgpu_kernel void @fmul_x2_xn3_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg) #0 {
   %x = bitcast i16 %x.arg to half
   %y = bitcast i16 %y.arg to half
-  %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+  %out.gep.1 = getelementptr half, ptr addrspace(1) %out, i32 1
   %mul2 = fmul fast half %x, 2.0
   %muln2 = fmul fast half %x, -3.0
   %mul = fmul fast half %mul2, %muln2
-  store volatile half %mul, half addrspace(1)* %out
+  store volatile half %mul, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
index 98ab1b2e7694e..db9f8b50f2c6f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -13,14 +13,14 @@
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = fmul half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -33,12 +33,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_f16_imm_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load volatile half, half addrspace(1)* %b
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = fmul half 3.0, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -52,12 +52,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_f16_imm_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
+  %a.val = load volatile half, ptr addrspace(1) %a
   %r.val = fmul half %a.val, 4.0
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -89,14 +89,14 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = fmul <2 x half> %a.val, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -124,12 +124,12 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_v2f16_imm_a(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = fmul <2 x half> <half 3.0, half 4.0>, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -156,12 +156,12 @@ entry:
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fmul_v2f16_imm_b(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fmul <2 x half> %a.val, <half 4.0, half 3.0>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -182,14 +182,14 @@ entry:
 ; VI: v_or_b32
 ; VI: v_or_b32
 define amdgpu_kernel void @fmul_v4f16(
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %a,
-    <4 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load <4 x half>, <4 x half> addrspace(1)* %a
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %a.val = load <4 x half>, ptr addrspace(1) %a
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = fmul <4 x half> %a.val, %b.val
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -214,11 +214,11 @@ entry:
 
 ; VI: buffer_store_dwordx2 v[[[OR0]]:[[OR1]]]
 define amdgpu_kernel void @fmul_v4f16_imm_a(
-    <4 x half> addrspace(1)* %r,
-    <4 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load <4 x half>, <4 x half> addrspace(1)* %b
+  %b.val = load <4 x half>, ptr addrspace(1) %b
   %r.val = fmul <4 x half> <half 8.0, half 2.0, half 3.0, half 4.0>, %b.val
-  store <4 x half> %r.val, <4 x half> addrspace(1)* %r
+  store <4 x half> %r.val, ptr addrspace(1) %r
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul.ll b/llvm/test/CodeGen/AMDGPU/fmul.ll
index 125de7aabfd4c..b11e9d43b876c 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul.ll
@@ -6,10 +6,10 @@
 ; GCN: v_mul_f32
 
 ; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-define amdgpu_kernel void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
+define amdgpu_kernel void @fmul_f32(ptr addrspace(1) %out, float %a, float %b) {
 entry:
   %0 = fmul float %a, %b
-  store float %0, float addrspace(1)* %out
+  store float %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -19,10 +19,10 @@ entry:
 
 ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
 ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+define amdgpu_kernel void @fmul_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fmul <2 x float> %a, %b
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
@@ -36,12 +36,12 @@ entry:
 ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-define amdgpu_kernel void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1) * %in
-  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
+define amdgpu_kernel void @fmul_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr
   %result = fmul <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -49,10 +49,10 @@ define amdgpu_kernel void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GCN: v_mul_f32
 ; GCN-NOT: v_mul_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
+define amdgpu_kernel void @test_mul_2_k(ptr addrspace(1) %out, float %x) #0 {
   %y = fmul float %x, 2.0
   %z = fmul float %y, 3.0
-  store float %z, float addrspace(1)* %out
+  store float %z, ptr addrspace(1) %out
   ret void
 }
 
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
 ; GCN-NOT: v_mul_f32
 ; GCN-NOT: v_mad_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
+define amdgpu_kernel void @test_mul_2_k_inv(ptr addrspace(1) %out, float %x) #0 {
   %y = fmul float %x, 3.0
   %z = fmul float %y, 2.0
-  store float %z, float addrspace(1)* %out
+  store float %z, ptr addrspace(1) %out
   ret void
 }
 
@@ -75,12 +75,12 @@ define amdgpu_kernel void @test_mul_2_k_inv(float addrspace(1)* %out, float %x)
 ; GCN: v_mul_f32
 ; GCN: v_mul_f32
 ; GCN-NOT: v_mul_f32
-define amdgpu_kernel void @test_mul_twouse(float addrspace(1)* %out, float %x, float %y) #0 {
+define amdgpu_kernel void @test_mul_twouse(ptr addrspace(1) %out, float %x, float %y) #0 {
   %a = fmul float %x, 5.0
   %b = fsub float -0.0, %a
   %c = fmul float %b, %y
   %d = fmul float %c, %a
-  store float %d, float addrspace(1)* %out
+  store float %d, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul64.ll b/llvm/test/CodeGen/AMDGPU/fmul64.ll
index d37d432842f37..a96f108f336f8 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul64.ll
@@ -3,24 +3,24 @@
 
 ; FUNC-LABEL: {{^}}fmul_f64:
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                      double addrspace(1)* %in2) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
+define amdgpu_kernel void @fmul_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                      ptr addrspace(1) %in2) {
+   %r0 = load double, ptr addrspace(1) %in1
+   %r1 = load double, ptr addrspace(1) %in2
    %r2 = fmul double %r0, %r1
-   store double %r2, double addrspace(1)* %out
+   store double %r2, ptr addrspace(1) %out
    ret void
 }
 
 ; FUNC-LABEL: {{^}}fmul_v2f64:
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
-                        <2 x double> addrspace(1)* %in2) {
-   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+define amdgpu_kernel void @fmul_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                        ptr addrspace(1) %in2) {
+   %r0 = load <2 x double>, ptr addrspace(1) %in1
+   %r1 = load <2 x double>, ptr addrspace(1) %in2
    %r2 = fmul <2 x double> %r0, %r1
-   store <2 x double> %r2, <2 x double> addrspace(1)* %out
+   store <2 x double> %r2, ptr addrspace(1) %out
    ret void
 }
 
@@ -29,11 +29,11 @@ define amdgpu_kernel void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
-                        <4 x double> addrspace(1)* %in2) {
-   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
+define amdgpu_kernel void @fmul_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                        ptr addrspace(1) %in2) {
+   %r0 = load <4 x double>, ptr addrspace(1) %in1
+   %r1 = load <4 x double>, ptr addrspace(1) %in2
    %r2 = fmul <4 x double> %r0, %r1
-   store <4 x double> %r2, <4 x double> addrspace(1)* %out
+   store <4 x double> %r2, ptr addrspace(1) %out
    ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
index f2a13f6fc22a1..198c0cac5cb47 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -27,13 +27,13 @@ declare half @llvm.fabs.f16(half) #1
 ; GFX10-FLUSH:  v_add_f16_e32
 ; GFX10-DENORM: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
-define amdgpu_kernel void @fmuladd_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
-                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
-  %r0 = load half, half addrspace(1)* %in1
-  %r1 = load half, half addrspace(1)* %in2
-  %r2 = load half, half addrspace(1)* %in3
+define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load half, ptr addrspace(1) %in1
+  %r1 = load half, ptr addrspace(1) %in2
+  %r2 = load half, ptr addrspace(1) %in3
   %r3 = tail call half @llvm.fmuladd.f16(half %r0, half %r1, half %r2)
-  store half %r3, half addrspace(1)* %out
+  store half %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -46,14 +46,14 @@ define amdgpu_kernel void @fmuladd_f16(half addrspace(1)* %out, half addrspace(1
 ; GFX10-FLUSH:  v_add_f16_e32
 ; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
-define amdgpu_kernel void @fmul_fadd_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
-                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
-  %r0 = load half, half addrspace(1)* %in1
-  %r1 = load half, half addrspace(1)* %in2
-  %r2 = load half, half addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load half, ptr addrspace(1) %in1
+  %r1 = load half, ptr addrspace(1) %in2
+  %r2 = load half, ptr addrspace(1) %in3
   %mul = fmul half %r0, %r1
   %add = fadd half %mul, %r2
-  store half %add, half addrspace(1)* %out
+  store half %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -66,14 +66,14 @@ define amdgpu_kernel void @fmul_fadd_f16(half addrspace(1)* %out, half addrspace
 ; GFX10-FLUSH:  v_add_f16_e32
 ; GFX10-DENORM: v_fmac_f16_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
-define amdgpu_kernel void @fmul_fadd_contract_f16(half addrspace(1)* %out, half addrspace(1)* %in1,
-                         half addrspace(1)* %in2, half addrspace(1)* %in3) #0 {
-  %r0 = load half, half addrspace(1)* %in1
-  %r1 = load half, half addrspace(1)* %in2
-  %r2 = load half, half addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_contract_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load half, ptr addrspace(1) %in1
+  %r1 = load half, ptr addrspace(1) %in2
+  %r2 = load half, ptr addrspace(1) %in3
   %mul = fmul contract half %r0, %r1
   %add = fadd contract half %mul, %r2
-  store half %add, half addrspace(1)* %out
+  store half %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -93,17 +93,17 @@ define amdgpu_kernel void @fmul_fadd_contract_f16(half addrspace(1)* %out, half
 ; GFX10-DENORM: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
 ; GFX10-FLUSH:  global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
 
-define amdgpu_kernel void @fmuladd_2.0_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -123,17 +123,17 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f16(half addrspace(1)* %out, half add
 ; GFX10-DENORM: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
 ; GFX10-FLUSH:  global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 
-define amdgpu_kernel void @fmuladd_a_2.0_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r3 = tail call half @llvm.fmuladd.f16(half %r1, half 2.0, half %r2)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -157,20 +157,20 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f16(half addrspace(1)* %out, half add
 ; GFX10-DENORM-STRICT:   global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-CONTRACT: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
 
-define amdgpu_kernel void @fadd_a_a_b_f16(half addrspace(1)* %out,
-                            half addrspace(1)* %in1,
-                            half addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f16(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile half, half addrspace(1)* %gep.0
-  %r1 = load volatile half, half addrspace(1)* %gep.1
+  %r0 = load volatile half, ptr addrspace(1) %gep.0
+  %r1 = load volatile half, ptr addrspace(1) %gep.1
 
   %add.0 = fadd half %r0, %r0
   %add.1 = fadd half %add.0, %r1
-  store half %add.1, half addrspace(1)* %gep.out
+  store half %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -194,20 +194,20 @@ define amdgpu_kernel void @fadd_a_a_b_f16(half addrspace(1)* %out,
 ; GFX10-DENORM-STRICT:   global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-CONTRACT: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
 
-define amdgpu_kernel void @fadd_b_a_a_f16(half addrspace(1)* %out,
-                            half addrspace(1)* %in1,
-                            half addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_b_a_a_f16(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile half, half addrspace(1)* %gep.0
-  %r1 = load volatile half, half addrspace(1)* %gep.1
+  %r0 = load volatile half, ptr addrspace(1) %gep.0
+  %r1 = load volatile half, ptr addrspace(1) %gep.1
 
   %add.0 = fadd half %r0, %r0
   %add.1 = fadd half %r1, %add.0
-  store half %add.1, half addrspace(1)* %gep.out
+  store half %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -223,17 +223,17 @@ define amdgpu_kernel void @fadd_b_a_a_f16(half addrspace(1)* %out,
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[R2]], [[MUL2]]
 ; GFX10-FLUSH:  global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
-define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1, half %r2)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -252,19 +252,19 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f16(half addrspace(1)* %out, half
 
 ; GFX10-DENORM: v_fmac_f16_e32 [[R2]], 2.0, [[R1]]
 ; GFX10-DENORM: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
-define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -283,19 +283,19 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out,
 
 ; GFX10-DENORM: v_fmac_f16_e32 [[R2]], -2.0, [[R1]]
 ; GFX10-DENORM: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
-define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -308,19 +308,19 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half
 ; GFX10-FLUSH: v_add_f16_e32 [[MUL2:v[0-9]+]], [[R1]], [[R1]]
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[MUL2]], [[R2]]
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %r2.fneg = fneg half %r2
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg)
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -341,21 +341,21 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half
 ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %mul = fmul half %a, %b
   %sub = fsub half %mul, %c
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -375,21 +375,21 @@ define amdgpu_kernel void @mad_sub_f16(half addrspace(1)* noalias nocapture %out
 ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
 ; GFX10: global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %mul = fmul half %a, %b
   %sub = fsub half %c, %mul
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -409,22 +409,22 @@ define amdgpu_kernel void @mad_sub_inv_f16(half addrspace(1)* noalias nocapture
 ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
 ; GFX10-FLUSH: v_sub_f16_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_fabs_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %c.abs = call half @llvm.fabs.f16(half %c) #0
   %mul = fmul half %a, %b
   %sub = fsub half %mul, %c.abs
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -445,22 +445,22 @@ define amdgpu_kernel void @mad_sub_fabs_f16(half addrspace(1)* noalias nocapture
 ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
 ; GFX10-FLUSH: v_sub_f16_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_inv_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_fabs_inv_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %c.abs = call half @llvm.fabs.f16(half %c) #0
   %mul = fmul half %a, %b
   %sub = fsub half %c.abs, %mul
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -484,23 +484,23 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f16(half addrspace(1)* noalias nocap
 ; GFX10-FLUSH:  global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-STRICT: global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-CONTRACT: global_store_{{short|b16}} v{{[0-9]+}}, [[REGC]]
-define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @neg_neg_mad_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %nega = fneg half %a
   %negb = fneg half %b
   %mul = fmul half %nega, %negb
   %sub = fadd half %mul, %c
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -521,22 +521,22 @@ define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture
 ; GFX10-FLUSH: v_mul_f16_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @mad_fabs_sub_f16(half addrspace(1)* noalias nocapture %out, half addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_fabs_sub_f16(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr half, half addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr half, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr half, half addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr half, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr half, half addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %gep0, align 2
-  %b = load volatile half, half addrspace(1)* %gep1, align 2
-  %c = load volatile half, half addrspace(1)* %gep2, align 2
+  %gep2 = getelementptr half, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %gep0, align 2
+  %b = load volatile half, ptr addrspace(1) %gep1, align 2
+  %c = load volatile half, ptr addrspace(1) %gep2, align 2
   %b.abs = call half @llvm.fabs.f16(half %b) #0
   %mul = fmul half %a, %b.abs
   %sub = fsub half %mul, %c
-  store half %sub, half addrspace(1)* %outgep, align 2
+  store half %sub, ptr addrspace(1) %outgep, align 2
   ret void
 }
 
@@ -559,19 +559,19 @@ define amdgpu_kernel void @mad_fabs_sub_f16(half addrspace(1)* noalias nocapture
 ; GFX10-FLUSH:  global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-STRICT:   global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
 ; GFX10-DENORM-CONTRACT: global_store_{{short|b16}} v{{[0-9]+}}, [[R2]]
-define amdgpu_kernel void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+define amdgpu_kernel void @fsub_c_fadd_a_a_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %add = fadd half %r1, %r1
   %r3 = fsub half %r2, %add
 
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -591,19 +591,19 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f16(half addrspace(1)* %out, half add
 ; GFX10-FLUSH: v_add_f16_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
 ; GFX10-FLUSH: v_sub_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
 ; GFX10:       global_store_{{short|b16}} v{{[0-9]+}}, [[RESULT]]
-define amdgpu_kernel void @fsub_fadd_a_a_c_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+define amdgpu_kernel void @fsub_fadd_a_a_c_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr half, half addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr half, half addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr half, half addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr half, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr half, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr half, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile half, half addrspace(1)* %gep.0
-  %r2 = load volatile half, half addrspace(1)* %gep.1
+  %r1 = load volatile half, ptr addrspace(1) %gep.0
+  %r2 = load volatile half, ptr addrspace(1) %gep.1
 
   %add = fadd half %r1, %r1
   %r3 = fsub half %add, %r2
 
-  store half %r3, half addrspace(1)* %gep.out
+  store half %r3, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 824b533d35e86..945973b277289 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -38,13 +38,13 @@ declare float @llvm.fabs.f32(float) #1
 
 ; GCN-DENORM-SLOWFMA: v_mul_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 ; GCN-DENORM-SLOWFMA: v_add_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
-                         float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
-  %r0 = load float, float addrspace(1)* %in1
-  %r1 = load float, float addrspace(1)* %in2
-  %r2 = load float, float addrspace(1)* %in3
+define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load float, ptr addrspace(1) %in1
+  %r1 = load float, ptr addrspace(1) %in2
+  %r2 = load float, ptr addrspace(1) %in3
   %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
-  store float %r3, float addrspace(1)* %out
+  store float %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -58,14 +58,14 @@ define amdgpu_kernel void @fmuladd_f32(float addrspace(1)* %out, float addrspace
 
 ; GCN-DENORM-STRICT: v_mul_f32_e32
 ; GCN-DENORM-STRICT: v_add_f32_e32
-define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
-                           float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
-  %r0 = load volatile float, float addrspace(1)* %in1
-  %r1 = load volatile float, float addrspace(1)* %in2
-  %r2 = load volatile float, float addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load volatile float, ptr addrspace(1) %in1
+  %r1 = load volatile float, ptr addrspace(1) %in2
+  %r2 = load volatile float, ptr addrspace(1) %in3
   %mul = fmul float %r0, %r1
   %add = fadd float %mul, %r2
-  store float %add, float addrspace(1)* %out
+  store float %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -76,14 +76,14 @@ define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspa
 ; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
 
 ; GCN-DENORM-FASTFMA: v_fma_f32
-define amdgpu_kernel void @fmul_fadd_contract_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
-                           float addrspace(1)* %in2, float addrspace(1)* %in3) #0 {
-  %r0 = load volatile float, float addrspace(1)* %in1
-  %r1 = load volatile float, float addrspace(1)* %in2
-  %r2 = load volatile float, float addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load volatile float, ptr addrspace(1) %in1
+  %r1 = load volatile float, ptr addrspace(1) %in2
+  %r2 = load volatile float, ptr addrspace(1) %in3
   %mul = fmul contract float %r0, %r1
   %add = fadd contract float %mul, %r2
-  store float %add, float addrspace(1)* %out
+  store float %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -103,17 +103,17 @@ define amdgpu_kernel void @fmul_fadd_contract_f32(float addrspace(1)* %out, floa
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -134,17 +134,17 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float a
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -167,20 +167,20 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float a
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
-                            float addrspace(1)* %in1,
-                            float addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile float, float addrspace(1)* %gep.0
-  %r1 = load volatile float, float addrspace(1)* %gep.1
+  %r0 = load volatile float, ptr addrspace(1) %gep.0
+  %r1 = load volatile float, ptr addrspace(1) %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %add.0, %r1
-  store float %add.1, float addrspace(1)* %gep.out
+  store float %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -203,20 +203,20 @@ define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
-                            float addrspace(1)* %in1,
-                            float addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile float, float addrspace(1)* %gep.0
-  %r1 = load volatile float, float addrspace(1)* %gep.1
+  %r0 = load volatile float, ptr addrspace(1) %gep.0
+  %r1 = load volatile float, ptr addrspace(1) %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %r1, %add.0
-  store float %add.1, float addrspace(1)* %gep.out
+  store float %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -233,17 +233,17 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -265,19 +265,19 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, flo
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -298,19 +298,19 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -330,19 +330,19 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %r2.fneg = fneg float %r2
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -362,21 +362,21 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -397,21 +397,21 @@ define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %ou
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %c, %mul
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -431,22 +431,22 @@ define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c.abs
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -467,22 +467,22 @@ define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocaptur
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %c.abs, %mul
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -505,23 +505,23 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias noca
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %nega = fneg float %a
   %negb = fneg float %b
   %mul = fmul float %nega, %negb
   %sub = fadd float %mul, %c
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -541,22 +541,22 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
+define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr float, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %gep0, align 4
-  %b = load volatile float, float addrspace(1)* %gep1, align 4
-  %c = load volatile float, float addrspace(1)* %gep2, align 4
+  %gep2 = getelementptr float, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %gep0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep1, align 4
+  %c = load volatile float, ptr addrspace(1) %gep2, align 4
   %b.abs = call float @llvm.fabs.f32(float %b) #0
   %mul = fmul float %a, %b.abs
   %sub = fsub float %mul, %c
-  store float %sub, float addrspace(1)* %outgep, align 4
+  store float %sub, ptr addrspace(1) %outgep, align 4
   ret void
 }
 
@@ -577,19 +577,19 @@ define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocaptur
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %r2, %add
 
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -608,19 +608,19 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float a
 
 ; SI: buffer_store_dword [[RESULT]]
 ; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fsub_fadd_a_a_c_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile float, float addrspace(1)* %gep.0
-  %r2 = load volatile float, float addrspace(1)* %gep.1
+  %r1 = load volatile float, ptr addrspace(1) %gep.0
+  %r2 = load volatile float, ptr addrspace(1) %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %add, %r2
 
-  store float %r3, float addrspace(1)* %gep.out
+  store float %r3, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
index 63a715e0e9ae2..d8c5bf5c7c8ce 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -7,13 +7,13 @@
 
 ; GCN-LABEL: {{^}}fmuladd_f64:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                         double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
-  %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fmuladd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
+  %r2 = load double, ptr addrspace(1) %in3
   %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
-  store double %r3, double addrspace(1)* %out
+  store double %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -22,28 +22,28 @@ define amdgpu_kernel void @fmuladd_f64(double addrspace(1)* %out, double addrspa
 
 ; GCN-STRICT: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GCN-STRICT: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmul_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                           double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
-  %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
+  %r2 = load double, ptr addrspace(1) %in3
   %tmp = fmul double %r0, %r1
   %r3 = fadd double %tmp, %r2
-  store double %r3, double addrspace(1)* %out
+  store double %r3, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fmul_fadd_contract_f64:
 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
-define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                           double addrspace(1)* %in2, double addrspace(1)* %in3) #0 {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
-  %r2 = load double, double addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_contract_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                           ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
+  %r2 = load double, ptr addrspace(1) %in3
   %tmp = fmul contract double %r0, %r1
   %r3 = fadd contract double %tmp, %r2
-  store double %r3, double addrspace(1)* %out
+  store double %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -58,20 +58,20 @@ define amdgpu_kernel void @fmul_fadd_contract_f64(double addrspace(1)* %out, dou
 
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_a_a_b_f64(double addrspace(1)* %out,
-                            double addrspace(1)* %in1,
-                            double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f64(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile double, double addrspace(1)* %gep.0
-  %r1 = load volatile double, double addrspace(1)* %gep.1
+  %r0 = load volatile double, ptr addrspace(1) %gep.0
+  %r1 = load volatile double, ptr addrspace(1) %gep.1
 
   %add.0 = fadd double %r0, %r0
   %add.1 = fadd double %add.0, %r1
-  store double %add.1, double addrspace(1)* %gep.out
+  store double %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -86,20 +86,20 @@ define amdgpu_kernel void @fadd_a_a_b_f64(double addrspace(1)* %out,
 
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; VI: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_b_a_a_f64(double addrspace(1)* %out,
-                            double addrspace(1)* %in1,
-                            double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_b_a_a_f64(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile double, double addrspace(1)* %gep.0
-  %r1 = load volatile double, double addrspace(1)* %gep.1
+  %r0 = load volatile double, ptr addrspace(1) %gep.0
+  %r1 = load volatile double, ptr addrspace(1) %gep.1
 
   %add.0 = fadd double %r0, %r0
   %add.1 = fadd double %r1, %add.0
-  store double %add.1, double addrspace(1)* %gep.out
+  store double %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -108,21 +108,21 @@ define amdgpu_kernel void @fadd_b_a_a_f64(double addrspace(1)* %out,
 ; GCN-STRICT: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
 
 ; GCN-CONTRACT: v_fma_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
+define amdgpu_kernel void @mad_sub_f64(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #1 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
-  %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
+  %gep0 = getelementptr double, ptr addrspace(1) %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
-  %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
+  %gep1 = getelementptr double, ptr addrspace(1) %ptr, i64 %add1
   %add2 = add i64 %tid.ext, 2
-  %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
-  %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %gep0, align 8
-  %b = load volatile double, double addrspace(1)* %gep1, align 8
-  %c = load volatile double, double addrspace(1)* %gep2, align 8
+  %gep2 = getelementptr double, ptr addrspace(1) %ptr, i64 %add2
+  %outgep = getelementptr double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %gep0, align 8
+  %b = load volatile double, ptr addrspace(1) %gep1, align 8
+  %c = load volatile double, ptr addrspace(1) %gep2, align 8
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
-  store double %sub, double addrspace(1)* %outgep, align 8
+  store double %sub, ptr addrspace(1) %outgep, align 8
   ret void
 }
 
@@ -131,20 +131,20 @@ define amdgpu_kernel void @mad_sub_f64(double addrspace(1)* noalias nocapture %o
 ; GCN-STRICT: v_add_f64
 
 ; GCN-CONTRACT: v_fma_f64
-define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out,
-                                      double addrspace(1)* %in1,
-                                      double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(ptr addrspace(1) %out,
+                                      ptr addrspace(1) %in1,
+                                      ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile double, double addrspace(1)* %gep.0
-  %r1 = load volatile double, double addrspace(1)* %gep.1
+  %r0 = load volatile double, ptr addrspace(1) %gep.0
+  %r1 = load volatile double, ptr addrspace(1) %gep.1
 
   %add.0 = fadd fast double %r0, %r0
   %add.1 = fadd double %add.0, %r1
-  store double %add.1, double addrspace(1)* %gep.out
+  store double %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -153,39 +153,39 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out,
 ; GCN-STRICT: v_add_f64
 
 ; GCN-CONTRACT: v_fma_f64
-define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(double addrspace(1)* %out,
-                                      double addrspace(1)* %in1,
-                                      double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f64_fast_add1(ptr addrspace(1) %out,
+                                      ptr addrspace(1) %in1,
+                                      ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile double, double addrspace(1)* %gep.0
-  %r1 = load volatile double, double addrspace(1)* %gep.1
+  %r0 = load volatile double, ptr addrspace(1) %gep.0
+  %r1 = load volatile double, ptr addrspace(1) %gep.1
 
   %add.0 = fadd double %r0, %r0
   %add.1 = fadd fast double %add.0, %r1
-  store double %add.1, double addrspace(1)* %gep.out
+  store double %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast:
 ; GCN: v_fma_f64
-define amdgpu_kernel void @fadd_a_a_b_f64_fast(double addrspace(1)* %out,
-                                 double addrspace(1)* %in1,
-                                double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out,
+                                 ptr addrspace(1) %in1,
+                                ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr double, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr double, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile double, double addrspace(1)* %gep.0
-  %r1 = load volatile double, double addrspace(1)* %gep.1
+  %r0 = load volatile double, ptr addrspace(1) %gep.0
+  %r1 = load volatile double, ptr addrspace(1) %gep.1
 
   %add.0 = fadd fast double %r0, %r0
   %add.1 = fadd fast double %add.0, %r1
-  store double %add.1, double addrspace(1)* %gep.out
+  store double %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index b76d67a48a065..a6281c473703a 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -17,13 +17,13 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 ; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 ; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
-                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
-  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
-  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
-  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+define amdgpu_kernel void @fmuladd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load <2 x half>, ptr addrspace(1) %in1
+  %r1 = load <2 x half>, ptr addrspace(1) %in2
+  %r2 = load <2 x half>, ptr addrspace(1) %in3
   %r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2)
-  store <2 x half> %r3, <2 x half> addrspace(1)* %out
+  store <2 x half> %r3, ptr addrspace(1) %out
   ret void
 }
 
@@ -32,14 +32,14 @@ define amdgpu_kernel void @fmuladd_v2f16(<2 x half> addrspace(1)* %out, <2 x hal
 ; GFX9-DENORM-STRICT: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 ; GFX9-DENORM-CONTRACT: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
-                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
-  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
-  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
-  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load <2 x half>, ptr addrspace(1) %in1
+  %r1 = load <2 x half>, ptr addrspace(1) %in2
+  %r2 = load <2 x half>, ptr addrspace(1) %in3
   %r3 = fmul <2 x half> %r0, %r1
   %r4 = fadd <2 x half> %r3, %r2
-  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  store <2 x half> %r4, ptr addrspace(1) %out
   ret void
 }
 
@@ -48,14 +48,14 @@ define amdgpu_kernel void @fmul_fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x h
 ; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 ; GFX9-DENORM: v_pk_fma_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1,
-                         <2 x half> addrspace(1)* %in2, <2 x half> addrspace(1)* %in3) #0 {
-  %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1
-  %r1 = load <2 x half>, <2 x half> addrspace(1)* %in2
-  %r2 = load <2 x half>, <2 x half> addrspace(1)* %in3
+define amdgpu_kernel void @fmul_fadd_contract_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                         ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+  %r0 = load <2 x half>, ptr addrspace(1) %in1
+  %r1 = load <2 x half>, ptr addrspace(1) %in2
+  %r2 = load <2 x half>, ptr addrspace(1) %in3
   %r3 = fmul contract <2 x half> %r0, %r1
   %r4 = fadd contract <2 x half> %r3, %r2
-  store <2 x half> %r4, <2 x half> addrspace(1)* %out
+  store <2 x half> %r4, ptr addrspace(1) %out
   ret void
 }
 
@@ -70,17 +70,17 @@ define amdgpu_kernel void @fmul_fadd_contract_v2f16(<2 x half> addrspace(1)* %ou
 
 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
 ; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
-  %r2 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
+  %r1 = load volatile <2 x half>, ptr addrspace(1) %gep.0
+  %r2 = load volatile <2 x half>, ptr addrspace(1) %gep.1
 
   %r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> <half 2.0, half 2.0>, <2 x half> %r1, <2 x half> %r2)
-  store <2 x half> %r3, <2 x half> addrspace(1)* %gep.out
+  store <2 x half> %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -94,17 +94,17 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out,
 
 ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
 ; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[RESULT]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
 
-  %r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
-  %r2 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
+  %r1 = load volatile <2 x half>, ptr addrspace(1) %gep.0
+  %r2 = load volatile <2 x half>, ptr addrspace(1) %gep.1
 
   %r3 = tail call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %r1, <2 x half> <half 2.0, half 2.0>, <2 x half> %r2)
-  store <2 x half> %r3, <2 x half> addrspace(1)* %gep.out
+  store <2 x half> %r3, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -120,20 +120,20 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out,
 ; GFX9-DENORM-CONTRACT: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
 
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
-define amdgpu_kernel void @fadd_a_a_b_v2f16(<2 x half> addrspace(1)* %out,
-                            <2 x half> addrspace(1)* %in1,
-                            <2 x half> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_v2f16(ptr addrspace(1) %out,
+                            ptr addrspace(1) %in1,
+                            ptr addrspace(1) %in2) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
-  %gep.1 = getelementptr <2 x half>, <2 x half> addrspace(1)* %gep.0, i32 1
-  %gep.out = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
+  %gep.1 = getelementptr <2 x half>, ptr addrspace(1) %gep.0, i32 1
+  %gep.out = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
 
-  %r0 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.0
-  %r1 = load volatile <2 x half>, <2 x half> addrspace(1)* %gep.1
+  %r0 = load volatile <2 x half>, ptr addrspace(1) %gep.0
+  %r1 = load volatile <2 x half>, ptr addrspace(1) %gep.1
 
   %add.0 = fadd <2 x half> %r0, %r0
   %add.1 = fadd <2 x half> %add.0, %r1
-  store <2 x half> %add.1, <2 x half> addrspace(1)* %gep.out
+  store <2 x half> %add.1, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
index 24dc277c8e734..beff3dd275713 100644
--- a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
@@ -11,7 +11,7 @@ declare double @llvm.nearbyint.f64(double) #0
 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
 
-define amdgpu_kernel void @fnearbyint_f16(half addrspace(1)* %out, half %in) #1 {
+define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) #1 {
 ; SI-LABEL: fnearbyint_f16:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -49,11 +49,11 @@ define amdgpu_kernel void @fnearbyint_f16(half addrspace(1)* %out, half %in) #1
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %1 = call half @llvm.nearbyint.f16(half %in)
-  store half %1, half addrspace(1)* %out
+  store half %1, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_f32(float addrspace(1)* %out, float %in) #1 {
+define amdgpu_kernel void @fnearbyint_f32(ptr addrspace(1) %out, float %in) #1 {
 ; SICI-LABEL: fnearbyint_f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dword s4, s[0:1], 0xb
@@ -77,11 +77,11 @@ define amdgpu_kernel void @fnearbyint_f32(float addrspace(1)* %out, float %in) #
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call float @llvm.nearbyint.f32(float %in)
-  store float %0, float addrspace(1)* %out
+  store float %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+define amdgpu_kernel void @fnearbyint_v2f32(ptr addrspace(1) %out, <2 x float> %in) #1 {
 ; SICI-LABEL: fnearbyint_v2f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -107,11 +107,11 @@ define amdgpu_kernel void @fnearbyint_v2f32(<2 x float> addrspace(1)* %out, <2 x
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %in)
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  store <2 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
+define amdgpu_kernel void @fnearbyint_v4f32(ptr addrspace(1) %out, <4 x float> %in) #1 {
 ; SICI-LABEL: fnearbyint_v4f32:
 ; SICI:       ; %bb.0: ; %entry
 ; SICI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
@@ -141,11 +141,11 @@ define amdgpu_kernel void @fnearbyint_v4f32(<4 x float> addrspace(1)* %out, <4 x
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %in)
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  store <4 x float> %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @nearbyint_f64(double addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @nearbyint_f64(ptr addrspace(1) %out, double %in) {
 ; SI-LABEL: nearbyint_f64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -192,10 +192,10 @@ define amdgpu_kernel void @nearbyint_f64(double addrspace(1)* %out, double %in)
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call double @llvm.nearbyint.f64(double %in)
-  store double %0, double addrspace(1)* %out
+  store double %0, ptr addrspace(1) %out
   ret void
 }
-define amdgpu_kernel void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @nearbyint_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
 ; SI-LABEL: nearbyint_v2f64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
@@ -256,11 +256,11 @@ define amdgpu_kernel void @nearbyint_v2f64(<2 x double> addrspace(1)* %out, <2 x
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %in)
-  store <2 x double> %0, <2 x double> addrspace(1)* %out
+  store <2 x double> %0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @nearbyint_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
 ; SI-LABEL: nearbyint_v4f64:
 ; SI:       ; %bb.0: ; %entry
 ; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
@@ -350,7 +350,7 @@ define amdgpu_kernel void @nearbyint_v4f64(<4 x double> addrspace(1)* %out, <4 x
 ; VI-NEXT:    s_endpgm
 entry:
   %0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %in)
-  store <4 x double> %0, <4 x double> addrspace(1)* %out
+  store <4 x double> %0, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index a8970b94c9c49..f412c737456f3 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -17,17 +17,17 @@
 
 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %add = fadd float %a, %b
   %fneg = fneg float %add
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -40,18 +40,18 @@ define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrsp
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %add = fadd float %a, %b
   %fneg = fneg float %add
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %add, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %add, ptr addrspace(1) %out
   ret void
 }
 
@@ -70,19 +70,19 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %add = fadd float %a, %b
   %fneg = fneg float %add
   %use1 = fmul float %add, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -96,18 +96,18 @@ define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out
 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
   %fneg = fneg float %add
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -120,18 +120,18 @@ define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float
 
 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.b = fneg float %b
   %add = fadd float %a, %fneg.b
   %fneg = fneg float %add
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -144,19 +144,19 @@ define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float
 
 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %fneg.b = fneg float %b
   %add = fadd float %fneg.a, %fneg.b
   %fneg = fneg float %add
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -174,19 +174,19 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, fl
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
   %fneg = fneg float %add
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %fneg.a, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %fneg.a, ptr addrspace(1) %out
   ret void
 }
 
@@ -204,20 +204,20 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
   %fneg = fneg float %add
   %use1 = fmul float %fneg.a, %c
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -272,17 +272,17 @@ define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -293,18 +293,18 @@ define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrsp
 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %mul, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -318,19 +318,19 @@ define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %use1 = fmul float %mul, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -339,18 +339,18 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -359,18 +359,18 @@ define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.b = fneg float %b
   %mul = fmul float %a, %fneg.b
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -379,19 +379,19 @@ define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %fneg.b = fneg float %b
   %mul = fmul float %fneg.a, %fneg.b
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -403,19 +403,19 @@ define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, fl
 
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %fneg.a, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %fneg.a, ptr addrspace(1) %out
   ret void
 }
 
@@ -426,20 +426,20 @@ define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %
 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
   %fneg = fneg float %mul
   %use1 = fmul float %fneg.a, %c
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -454,17 +454,17 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -484,15 +484,15 @@ define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_self_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float %a, float %a)
   %min.fneg = fneg float %min
-  store float %min.fneg, float addrspace(1)* %out.gep
+  store float %min.fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -511,15 +511,15 @@ define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -538,15 +538,15 @@ define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -566,15 +566,15 @@ define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]]
 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MIN]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call nnan float @llvm.minnum.f32(float 0.0, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -583,15 +583,15 @@ define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float a
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -606,15 +606,15 @@ define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out,
 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -628,15 +628,15 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, fl
 ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
   %fneg = fneg float %min
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -652,15 +652,15 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out
 ; VI: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x8000, [[MAX]]
 
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %a.gep
   %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
   %fneg = fsub half -0.000000e+00, %min
-  store half %fneg, half addrspace(1)* %out.gep
+  store half %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -675,15 +675,15 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f16(half addrspace(1)* %out, hal
 ; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], 0.15915494, [[NEG_QUIET]]
 
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %a.gep
   %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
   %fneg = fsub half -0.000000e+00, %min
-  store half %fneg, half addrspace(1)* %out.gep
+  store half %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -699,15 +699,15 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f16(half addrspace(1)* %out,
 ; VI: v_xor_b32_e32 v[[RESULT_HI]], 0x80000000, v[[RESULT_HI]]
 
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RESULT_LO]]:[[RESULT_HI]]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
   %fneg = fsub double -0.000000e+00, %min
-  store double %fneg, double addrspace(1)* %out.gep
+  store double %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -723,15 +723,15 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f64(double addrspace(1)* %out, d
 ; VI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[NEG_QUIET]], 0.15915494
 
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
   %fneg = fsub double -0.000000e+00, %min
-  store double %fneg, double addrspace(1)* %out.gep
+  store double %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -752,18 +752,18 @@ define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[QUIET_A]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
   %fneg = fneg float %min
   %mul = fmul float %fneg, %b
-  store float %mul, float addrspace(1)* %out.gep
+  store float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -781,18 +781,18 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace
 ; VI: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]]
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
   %fneg = fneg float %min
   %mul = fmul float %fneg, %b
-  store float %mul, float addrspace(1)* %out.gep
+  store float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -820,19 +820,19 @@ define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
   %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -863,17 +863,17 @@ define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_B:v[0-9]+]], -1.0, [[B]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %fneg = fneg float %max
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -893,15 +893,15 @@ define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
 ; GCN-DAG: v_mul_f32_e32 [[NEG_QUIET_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], [[NEG_QUIET_A]], [[NEG_QUIET_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_self_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %max = call float @llvm.maxnum.f32(float %a, float %a)
   %max.fneg = fneg float %max
-  store float %max.fneg, float addrspace(1)* %out.gep
+  store float %max.fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -920,15 +920,15 @@ define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], -4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
   %fneg = fneg float %max
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -947,15 +947,15 @@ define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 4.0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
   %fneg = fneg float %max
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -975,15 +975,15 @@ define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
 ; GCN: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[MAX]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_maxnum_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %max = call nnan float @llvm.maxnum.f32(float 0.0, float %a)
   %fneg = fneg float %max
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -992,15 +992,15 @@ define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float a
 ; GCN: v_mul_f32_e32 [[QUIET_NEG_A:v[0-9]+]], -1.0, [[A]]
 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_NEG_A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
   %fneg = fneg float %max
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1021,18 +1021,18 @@ define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[QUIET_A]]
 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
   %fneg = fneg float %max
   %mul = fmul float %fneg, %b
-  store float %mul, float addrspace(1)* %out.gep
+  store float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1060,19 +1060,19 @@ define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
   %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -1105,19 +1105,19 @@ define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %
 
 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
   %fneg = fneg float %fma
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1131,20 +1131,20 @@ define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrsp
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %fma, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %fma, ptr addrspace(1) %out
   ret void
 }
 
@@ -1164,21 +1164,21 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
   %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -1193,20 +1193,20 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1221,20 +1221,20 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, flo
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1249,21 +1249,21 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.a = fneg float %a
   %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1278,21 +1278,21 @@ define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out,
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.a = fneg float %a
   %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1307,20 +1307,20 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out,
 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]]
 ; GCN-NSZ-NOT: [[FMA]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
-define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1341,21 +1341,21 @@ define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, flo
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]]
 ; GCN-NSZ-NOT: [[NEG_A]]
 ; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fneg = fneg float %fma
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %fneg.a, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %fneg.a, ptr addrspace(1) %out
   ret void
 }
 
@@ -1373,22 +1373,22 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)*
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NSZ-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 {
+define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, float %d) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fneg = fneg float %fma
   %use1 = fmul float %fneg.a, %d
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -1406,19 +1406,19 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)*
 
 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
   %fneg = fneg float %fma
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1428,19 +1428,19 @@ define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrs
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-NSZ: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
-define amdgpu_kernel void @v_fneg_fmad_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %a.ptr, <4 x float> addrspace(1)* %b.ptr, <4 x float> addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile <4 x float>, <4 x float> addrspace(1)* %a.gep
-  %b = load volatile <4 x float>, <4 x float> addrspace(1)* %b.gep
-  %c = load volatile <4 x float>, <4 x float> addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile <4 x float>, ptr addrspace(1) %a.gep
+  %b = load volatile <4 x float>, ptr addrspace(1) %b.gep
+  %c = load volatile <4 x float>, ptr addrspace(1) %c.gep
   %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
   %fneg = fneg <4 x float> %fma
-  store <4 x float> %fneg, <4 x float> addrspace(1)* %out.gep
+  store <4 x float> %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1460,21 +1460,21 @@ define amdgpu_kernel void @v_fneg_fmad_v4f32(<4 x float> addrspace(1)* %out, <4
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
   %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -1486,15 +1486,15 @@ define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %o
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fpext = fpext float %a to double
   %fneg = fsub double -0.000000e+00, %fpext
-  store double %fneg, double addrspace(1)* %out.gep
+  store double %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1502,16 +1502,16 @@ define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]]
 ; GCN: {{buffer|flat}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
-  store double %fneg, double addrspace(1)* %out.gep
+  store double %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1521,17 +1521,17 @@ define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)*
 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FNEG_A]]
-define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
-  store volatile double %fneg, double addrspace(1)* %out.gep
-  store volatile float %fneg.a, float addrspace(1)* undef
+  store volatile double %fneg, ptr addrspace(1) %out.gep
+  store volatile float %fneg.a, ptr addrspace(1) undef
   ret void
 }
 
@@ -1541,16 +1541,16 @@ define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double add
 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[CVT_LO]]:[[CVT_HI]]]
-define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fpext = fpext float %a to double
   %fneg = fsub double -0.000000e+00, %fpext
-  store volatile double %fneg, double addrspace(1)* %out.gep
-  store volatile double %fpext, double addrspace(1)* undef
+  store volatile double %fneg, ptr addrspace(1) %out.gep
+  store volatile double %fpext, ptr addrspace(1) undef
   ret void
 }
 
@@ -1561,47 +1561,47 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double add
 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v[[[CVT_LO]]:[[CVT_HI]]], 4.0
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[FNEG_A]]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fpext = fpext float %a to double
   %fneg = fsub double -0.000000e+00, %fpext
   %mul = fmul double %fpext, 4.0
-  store volatile double %fneg, double addrspace(1)* %out.gep
-  store volatile double %mul, double addrspace(1)* %out.gep
+  store volatile double %fneg, ptr addrspace(1) %out.gep
+  store volatile double %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; FIXME: Source modifiers not folded for f16->f32
 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
-define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %a.gep
   %fpext = fpext half %a to float
   %fneg = fneg float %fpext
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile float %fpext, float addrspace(1)* %out.gep
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile float %fpext, ptr addrspace(1) %out.gep
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
-define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile half, half addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds half, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile half, ptr addrspace(1) %a.gep
   %fpext = fpext half %a to float
   %fneg = fneg float %fpext
   %mul = fmul float %fpext, 4.0
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile float %mul, float addrspace(1)* %out.gep
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1613,15 +1613,15 @@ define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(f
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %fpround = fptrunc double %a to float
   %fneg = fneg float %fpround
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1629,16 +1629,16 @@ define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out,
 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
   %fneg = fneg float %fpround
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1648,17 +1648,17 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %
 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[A_LO]]:[[NEG_A_HI]]]
-define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
   %fneg = fneg float %fpround
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile double %fneg.a, double addrspace(1)* undef
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile double %fneg.a, ptr addrspace(1) undef
   ret void
 }
 
@@ -1669,18 +1669,18 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrs
 
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
-define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, double %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
   %fneg = fneg float %fpround
   %use1 = fmul double %fneg.a, %c
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile double %use1, double addrspace(1)* undef
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile double %use1, ptr addrspace(1) undef
   ret void
 }
 
@@ -1688,15 +1688,15 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrs
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fpround = fptrunc float %a to half
   %fneg = fsub half -0.000000e+00, %fpround
-  store half %fneg, half addrspace(1)* %out.gep
+  store half %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1704,16 +1704,16 @@ define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, f
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
-  store half %fneg, half addrspace(1)* %out.gep
+  store half %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1723,16 +1723,16 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %o
 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[CVT]]
-define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
   %fpround = fptrunc double %a to float
   %fneg = fneg float %fpround
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile float %fpround, float addrspace(1)* %out.gep
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile float %fpround, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1742,17 +1742,17 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrs
 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
-  store volatile half %fneg, half addrspace(1)* %out.gep
-  store volatile float %fneg.a, float addrspace(1)* undef
+  store volatile half %fneg, ptr addrspace(1) %out.gep
+  store volatile float %fneg.a, ptr addrspace(1) undef
   ret void
 }
 
@@ -1762,18 +1762,18 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrsp
 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s
 ; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[USE1]]
-define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   %use1 = fmul float %fneg.a, %c
-  store volatile half %fneg, half addrspace(1)* %out.gep
-  store volatile float %use1, float addrspace(1)* undef
+  store volatile half %fneg, ptr addrspace(1) %out.gep
+  store volatile float %use1, ptr addrspace(1) undef
   ret void
 }
 
@@ -1785,15 +1785,15 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrsp
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
   %fneg = fneg float %rcp
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1801,16 +1801,16 @@ define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrsp
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
   %fneg = fneg float %rcp
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1820,17 +1820,17 @@ define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float a
 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
   %fneg = fneg float %rcp
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile float %fneg.a, float addrspace(1)* undef
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile float %fneg.a, ptr addrspace(1) undef
   ret void
 }
 
@@ -1840,18 +1840,18 @@ define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %ou
 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, float %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
   %fneg = fneg float %rcp
   %use1 = fmul float %fneg.a, %c
-  store volatile float %fneg, float addrspace(1)* %out.gep
-  store volatile float %use1, float addrspace(1)* undef
+  store volatile float %fneg, ptr addrspace(1) %out.gep
+  store volatile float %use1, ptr addrspace(1) undef
   ret void
 }
 
@@ -1864,17 +1864,17 @@ define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %ou
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %fneg = fneg float %mul
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -1887,18 +1887,18 @@ define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %mul, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -1911,19 +1911,19 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addr
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
   %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -1932,18 +1932,18 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addr
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1952,18 +1952,18 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1972,19 +1972,19 @@ define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
-define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -1995,19 +1995,19 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %
 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]]
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_A]]
-define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
   %fneg = fneg float %mul
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %fneg.a, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %fneg.a, ptr addrspace(1) %out
   ret void
 }
 
@@ -2018,20 +2018,20 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspac
 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_MUL_LEGACY]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 {
+define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, float %c) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
   %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
-  store volatile float %fneg, float addrspace(1)* %out
-  store volatile float %use1, float addrspace(1)* %out
+  store volatile float %fneg, ptr addrspace(1) %out
+  store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
 
@@ -2045,15 +2045,15 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspac
 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]]
 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %sin = call float @llvm.sin.f32(float %a)
   %fneg = fneg float %sin
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2061,15 +2061,15 @@ define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrsp
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %sin = call float @llvm.amdgcn.sin.f32(float %a)
   %fneg = fneg float %sin
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2081,15 +2081,15 @@ define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_trunc_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %trunc = call float @llvm.trunc.f32(float %a)
   %fneg = fneg float %trunc
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2108,15 +2108,15 @@ define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addr
 
 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_round_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %round = call float @llvm.round.f32(float %a)
   %fneg = fneg float %round
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2128,15 +2128,15 @@ define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addr
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %rint = call float @llvm.rint.f32(float %a)
   %fneg = fneg float %rint
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2148,15 +2148,15 @@ define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrs
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_nearbyint_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %nearbyint = call float @llvm.nearbyint.f32(float %a)
   %fneg = fneg float %nearbyint
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2168,15 +2168,15 @@ define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_canonicalize_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %trunc = call float @llvm.canonicalize.f32(float %a)
   %fneg = fneg float %trunc
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2190,20 +2190,20 @@ define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, flo
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
 ; GCN: v_interp_p1_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_interp_p1_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
-  store volatile float %intrp0, float addrspace(1)* %out.gep
-  store volatile float %intrp1, float addrspace(1)* %out.gep
+  store volatile float %intrp0, ptr addrspace(1) %out.gep
+  store volatile float %intrp1, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2213,20 +2213,20 @@ define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
 ; GCN: v_interp_p2_f32{{(_e32)?}} v{{[0-9]+}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
+define amdgpu_kernel void @v_fneg_interp_p2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
-  store volatile float %intrp0, float addrspace(1)* %out.gep
-  store volatile float %intrp1, float addrspace(1)* %out.gep
+  store volatile float %intrp0, ptr addrspace(1) %out.gep
+  store volatile float %intrp1, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2248,16 +2248,16 @@ define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float
 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 
-define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   %cmp0 = icmp eq i32 %d, 0
@@ -2265,11 +2265,11 @@ define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float
 
 if:
   %mul1 = fmul float %fneg, %c
-  store volatile float %mul1, float addrspace(1)* %out.gep
+  store volatile float %mul1, ptr addrspace(1) %out.gep
   br label %endif
 
 endif:
-  store volatile float %mul, float addrspace(1)* %out.gep
+  store volatile float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2284,20 +2284,20 @@ endif:
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]]
 ; GCN: ; use [[MUL]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_inlineasm_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
-  store volatile float %fneg, float addrspace(1)* %out.gep
+  store volatile float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2313,20 +2313,20 @@ define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float
 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]]
 ; GCN: ; use [[NEG]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
-define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 {
+define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, i32 %d) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
   %mul = fmul float %a, %b
   %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
-  store volatile float %mul, float addrspace(1)* %out.gep
+  store volatile float %mul, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -2348,23 +2348,23 @@ define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
 
   %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
 
-  store volatile float %fma0, float addrspace(1)* %out
-  store volatile float %fma1, float addrspace(1)* %out
+  store volatile float %fma0, ptr addrspace(1) %out
+  store volatile float %fma1, ptr addrspace(1) %out
   ret void
 }
 
@@ -2382,23 +2382,23 @@ define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %o
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
 
   %fneg.a = fneg float %a
   %mul0 = fmul float %fneg.a, %b
   %mul1 = fmul float %fneg.a, %c
 
-  store volatile float %mul0, float addrspace(1)* %out
-  store volatile float %mul1, float addrspace(1)* %out
+  store volatile float %mul0, ptr addrspace(1) %out
+  store volatile float %mul1, ptr addrspace(1) %out
   ret void
 }
 
@@ -2415,23 +2415,23 @@ define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %o
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 {
+define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
 
   %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
   %mul1 = fmul float %fneg.a, %c
 
-  store volatile float %fma0, float addrspace(1)* %out
-  store volatile float %mul1, float addrspace(1)* %out
+  store volatile float %fma0, ptr addrspace(1) %out
+  store volatile float %mul1, ptr addrspace(1) %out
   ret void
 }
 
@@ -2456,26 +2456,26 @@ define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)*
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL2]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
+define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
-  %d = load volatile float, float addrspace(1)* %d.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
+  %d = load volatile float, ptr addrspace(1) %d.gep
 
   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
   %fneg.fma0 = fneg float %fma0
   %mul1 = fmul float %fneg.fma0, %c
   %mul2 = fmul float %fneg.fma0, %d
 
-  store volatile float %mul1, float addrspace(1)* %out
-  store volatile float %mul2, float addrspace(1)* %out
+  store volatile float %mul1, ptr addrspace(1) %out
+  store volatile float %mul2, ptr addrspace(1) %out
   ret void
 }
 
@@ -2493,26 +2493,26 @@ define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 ; GCN-NEXT: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)
-define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 {
+define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext
-  %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile double, double addrspace(1)* %a.gep
-  %b = load volatile double, double addrspace(1)* %b.gep
-  %c = load volatile double, double addrspace(1)* %c.gep
-  %d = load volatile double, double addrspace(1)* %d.gep
+  %a.gep = getelementptr inbounds double, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds double, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds double, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %d.gep = getelementptr inbounds double, ptr addrspace(1) %d.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile double, ptr addrspace(1) %a.gep
+  %b = load volatile double, ptr addrspace(1) %b.gep
+  %c = load volatile double, ptr addrspace(1) %c.gep
+  %d = load volatile double, ptr addrspace(1) %d.gep
 
   %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
   %fneg.fma0 = fsub double -0.0, %fma0
   %mul1 = fmul double %fneg.fma0, %c
   %mul2 = fmul double %fneg.fma0, %d
 
-  store volatile double %mul1, double addrspace(1)* %out
-  store volatile double %mul2, double addrspace(1)* %out
+  store volatile double %mul1, ptr addrspace(1) %out
+  store volatile double %mul2, ptr addrspace(1) %out
   ret void
 }
 
@@ -2526,23 +2526,23 @@ define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace
 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]]
 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
-define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
+define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
-  %d = load volatile float, float addrspace(1)* %d.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
+  %d = load volatile float, ptr addrspace(1) %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
   %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
-  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma0, ptr addrspace(1) %out
   ret void
 }
 
@@ -2556,25 +2556,25 @@ define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)*
 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA0]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MUL1]]
-define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 {
+define amdgpu_kernel void @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr, ptr addrspace(1) %c.ptr, ptr addrspace(1) %d.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext
-  %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext
-  %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
-  %b = load volatile float, float addrspace(1)* %b.gep
-  %c = load volatile float, float addrspace(1)* %c.gep
-  %d = load volatile float, float addrspace(1)* %d.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext
+  %c.gep = getelementptr inbounds float, ptr addrspace(1) %c.ptr, i64 %tid.ext
+  %d.gep = getelementptr inbounds float, ptr addrspace(1) %d.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
+  %b = load volatile float, ptr addrspace(1) %b.gep
+  %c = load volatile float, ptr addrspace(1) %c.gep
+  %d = load volatile float, ptr addrspace(1) %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
   %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
   %mul1 = fmul float %trunc.a, %d
-  store volatile float %fma0, float addrspace(1)* %out
-  store volatile float %mul1, float addrspace(1)* %out
+  store volatile float %fma0, ptr addrspace(1) %out
+  store volatile float %mul1, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
index 37a062256cb77..c9a31e9207a05 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
@@ -9,15 +9,15 @@
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
 ; GCN: {{buffer|flat}}_store_dword [[RESULT]]
-define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
+define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
-  %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load volatile float, float addrspace(1)* %a.gep
+  %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext
+  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %a = load volatile float, ptr addrspace(1) %a.gep
   %rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
   %fneg = fsub float -0.000000e+00, %rcp
-  store float %fneg, float addrspace(1)* %out.gep
+  store float %fneg, ptr addrspace(1) %out.gep
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index 1cbb2c3ce1b99..d5741ae11376f 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -9,11 +9,11 @@
 
 ; GFX89-NOT: _and
 ; GFX89: v_sub_f16_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
+define amdgpu_kernel void @fneg_fabs_fadd_f16(ptr addrspace(1) %out, half %x, half %y) {
   %fabs = call half @llvm.fabs.f16(half %x)
   %fsub = fsub half -0.0, %fabs
   %fadd = fadd half %y, %fsub
-  store half %fadd, half addrspace(1)* %out, align 2
+  store half %fadd, ptr addrspace(1) %out, align 2
   ret void
 }
 
@@ -27,11 +27,11 @@ define amdgpu_kernel void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x,
 ; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], {{s[0-9]+}}, -|{{v[0-9]+}}|
 ; GFX89-NOT: [[MUL]]
 ; GFX89: {{flat|global}}_store_short v{{.+}}, [[MUL]]
-define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
+define amdgpu_kernel void @fneg_fabs_fmul_f16(ptr addrspace(1) %out, half %x, half %y) {
   %fabs = call half @llvm.fabs.f16(half %x)
   %fsub = fsub half -0.0, %fabs
   %fmul = fmul half %y, %fsub
-  store half %fmul, half addrspace(1)* %out, align 2
+  store half %fmul, ptr addrspace(1) %out, align 2
   ret void
 }
 
@@ -41,30 +41,30 @@ define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x,
 
 ; GCN-LABEL: {{^}}fneg_fabs_free_f16:
 ; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
-define amdgpu_kernel void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
+define amdgpu_kernel void @fneg_fabs_free_f16(ptr addrspace(1) %out, i16 %in) {
   %bc = bitcast i16 %in to half
   %fabs = call half @llvm.fabs.f16(half %bc)
   %fsub = fsub half -0.0, %fabs
-  store half %fsub, half addrspace(1)* %out
+  store half %fsub, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fneg_fabs_f16:
 ; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
-define amdgpu_kernel void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
+define amdgpu_kernel void @fneg_fabs_f16(ptr addrspace(1) %out, half %in) {
   %fabs = call half @llvm.fabs.f16(half %in)
   %fsub = fsub half -0.0, %fabs
-  store half %fsub, half addrspace(1)* %out, align 2
+  store half %fsub, ptr addrspace(1) %out, align 2
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fneg_fabs_f16:
 ; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
-define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
-  %val = load half, half addrspace(1)* %in, align 2
+define amdgpu_kernel void @v_fneg_fabs_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load half, ptr addrspace(1) %in, align 2
   %fabs = call half @llvm.fabs.f16(half %val)
   %fsub = fsub half -0.0, %fabs
-  store half %fsub, half addrspace(1)* %out, align 2
+  store half %fsub, ptr addrspace(1) %out, align 2
   ret void
 }
 
@@ -75,11 +75,11 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
 ; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]
 
 ; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(ptr addrspace(1) %out, <2 x half> %in) {
   %add = fadd <2 x half> %in, <half 1.0, half 2.0>
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
-  store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+  store <2 x half> %fneg.fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -89,10 +89,10 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
 ; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
-define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(ptr addrspace(1) %out, <2 x half> %in) {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
-  store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+  store <2 x half> %fneg.fabs, ptr addrspace(1) %out
   ret void
 }
 
@@ -100,10 +100,10 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %ou
 ; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
 ; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
 ; GCN: {{flat|global}}_store_dwordx2
-define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
+define amdgpu_kernel void @fneg_fabs_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
   %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
   %fsub = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, %fabs
-  store <4 x half> %fsub, <4 x half> addrspace(1)* %out
+  store <4 x half> %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -120,11 +120,11 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h
 
 ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
-define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
   %mul = fmul <2 x half> %fneg.fabs, <half 4.0, half 4.0>
-  store <2 x half> %mul, <2 x half> addrspace(1)* %out
+  store <2 x half> %mul, ptr addrspace(1) %out
   ret void
 }
 
@@ -135,23 +135,23 @@ define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %o
 ; GFX9-DAG: v_mov_b32_e32 [[V_NEG:v[0-9]+]], [[NEG]]
 ; GFX9-DAG: global_store_dword v{{[0-9]+}}, [[V_ABS]], s{{\[[0-9]+:[0-9]+\]}}
 ; GFX9: global_store_dword v{{[0-9]+}}, [[V_NEG]], s{{\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x half> %in) {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
-  store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
-  store <2 x half> %fneg, <2 x half> addrspace(1)* %out1
+  store <2 x half> %fabs, ptr addrspace(1) %out0
+  store <2 x half> %fneg, ptr addrspace(1) %out1
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:
 ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
-define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x half> %in) {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
   %mul = fmul <2 x half> %fneg, <half 4.0, half 4.0>
-  store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
-  store <2 x half> %mul, <2 x half> addrspace(1)* %out1
+  store <2 x half> %fabs, ptr addrspace(1) %out0
+  store <2 x half> %mul, ptr addrspace(1) %out1
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
index 14525dba1b1c3..5f1d232daabe5 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
@@ -6,51 +6,51 @@
 
 ; GCN-LABEL: {{^}}fneg_fabs_fadd_f64:
 ; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
+define amdgpu_kernel void @fneg_fabs_fadd_f64(ptr addrspace(1) %out, double %x, double %y) {
   %fabs = call double @llvm.fabs.f64(double %x)
   %fsub = fsub double -0.000000e+00, %fabs
   %fadd = fadd double %y, %fsub
-  store double %fadd, double addrspace(1)* %out, align 8
+  store double %fadd, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
-  %x = load double, double addrspace(1)* %xptr, align 8
-  %y = load double, double addrspace(1)* %xptr, align 8
+define amdgpu_kernel void @v_fneg_fabs_fadd_f64(ptr addrspace(1) %out, ptr addrspace(1) %xptr, ptr addrspace(1) %yptr) {
+  %x = load double, ptr addrspace(1) %xptr, align 8
+  %y = load double, ptr addrspace(1) %xptr, align 8
   %fabs = call double @llvm.fabs.f64(double %x)
   %fsub = fsub double -0.000000e+00, %fabs
   %fadd = fadd double %y, %fsub
-  store double %fadd, double addrspace(1)* %out, align 8
+  store double %fadd, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}fneg_fabs_fmul_f64:
 ; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
-define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
+define amdgpu_kernel void @fneg_fabs_fmul_f64(ptr addrspace(1) %out, double %x, double %y) {
   %fabs = call double @llvm.fabs.f64(double %x)
   %fsub = fsub double -0.000000e+00, %fabs
   %fmul = fmul double %y, %fsub
-  store double %fmul, double addrspace(1)* %out, align 8
+  store double %fmul, ptr addrspace(1) %out, align 8
   ret void
 }
 
 ; GCN-LABEL: {{^}}fneg_fabs_free_f64:
-define amdgpu_kernel void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @fneg_fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
   %bc = bitcast i64 %in to double
   %fabs = call double @llvm.fabs.f64(double %bc)
   %fsub = fsub double -0.000000e+00, %fabs
-  store double %fsub, double addrspace(1)* %out
+  store double %fsub, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fneg_fabs_fn_free_f64:
 ; SI: s_bitset1_b32
 ; VI:  s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @fneg_fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
   %bc = bitcast i64 %in to double
   %fabs = call double @fabs(double %bc)
   %fsub = fsub double -0.000000e+00, %fabs
-  store double %fsub, double addrspace(1)* %out
+  store double %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -61,10 +61,10 @@ define amdgpu_kernel void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64
 ; GCN-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
 ; GCN-DAG: v_mov_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]]
 ; GCN: buffer_store_dwordx2 v[[[LO_V]]:[[HI_V]]]
-define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], double %in) {
+define amdgpu_kernel void @fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %in) {
   %fabs = call double @llvm.fabs.f64(double %in)
   %fsub = fsub double -0.000000e+00, %fabs
-  store double %fsub, double addrspace(1)* %out, align 8
+  store double %fsub, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -72,10 +72,10 @@ define amdgpu_kernel void @fneg_fabs_f64(double addrspace(1)* %out, [8 x i32], d
 ; GCN-NOT: 0x80000000
 ; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
 ; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+define amdgpu_kernel void @fneg_fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
   %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
   %fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
-  store <2 x double> %fsub, <2 x double> addrspace(1)* %out
+  store <2 x double> %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -85,10 +85,10 @@ define amdgpu_kernel void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x
 ; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
 ; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
 ; GCN: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+define amdgpu_kernel void @fneg_fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
   %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
   %fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
-  store <4 x double> %fsub, <4 x double> addrspace(1)* %out
+  store <4 x double> %fsub, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index c4133d87795ec..e2e1effc9b406 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -5,11 +5,11 @@
 ; FUNC-LABEL: {{^}}fneg_fabsf_fadd_f32:
 ; SI-NOT: and
 ; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @fneg_fabsf_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
+define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
   %fsub = fsub float -0.000000e+00, %fabs
   %fadd = fadd float %y, %fsub
-  store float %fadd, float addrspace(1)* %out, align 4
+  store float %fadd, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -17,11 +17,11 @@ define amdgpu_kernel void @fneg_fabsf_fadd_f32(float addrspace(1)* %out, float %
 ; SI-NOT: and
 ; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
 ; SI-NOT: and
-define amdgpu_kernel void @fneg_fabsf_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
+define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
   %fsub = fsub float -0.000000e+00, %fabs
   %fmul = fmul float %y, %fsub
-  store float %fmul, float addrspace(1)* %out, align 4
+  store float %fmul, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -36,11 +36,11 @@ define amdgpu_kernel void @fneg_fabsf_fmul_f32(float addrspace(1)* %out, float %
 
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
 ; VI: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @fneg_fabsf_free_f32(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fabs = call float @llvm.fabs.f32(float %bc)
   %fsub = fsub float -0.000000e+00, %fabs
-  store float %fsub, float addrspace(1)* %out
+  store float %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -50,30 +50,30 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(float addrspace(1)* %out, i32 %in
 ; R600: -PV
 
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-define amdgpu_kernel void @fneg_fabsf_fn_free_f32(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fabs = call float @fabsf(float %bc)
   %fsub = fsub float -0.000000e+00, %fabs
-  store float %fsub, float addrspace(1)* %out
+  store float %fsub, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fneg_fabsf_f32:
 ; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-define amdgpu_kernel void @fneg_fabsf_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fneg_fabsf_f32(ptr addrspace(1) %out, float %in) {
   %fabs = call float @llvm.fabs.f32(float %in)
   %fsub = fsub float -0.000000e+00, %fabs
-  store float %fsub, float addrspace(1)* %out, align 4
+  store float %fsub, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_fneg_fabsf_f32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
-define amdgpu_kernel void @v_fneg_fabsf_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @v_fneg_fabsf_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %val = load float, ptr addrspace(1) %in, align 4
   %fabs = call float @llvm.fabs.f32(float %val)
   %fsub = fsub float -0.000000e+00, %fabs
-  store float %fsub, float addrspace(1)* %out, align 4
+  store float %fsub, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -86,10 +86,10 @@ define amdgpu_kernel void @v_fneg_fabsf_f32(float addrspace(1)* %out, float addr
 ; FIXME: In this case two uses of the constant should be folded
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @fneg_fabsf_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+define amdgpu_kernel void @fneg_fabsf_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
-  store <2 x float> %fsub, <2 x float> addrspace(1)* %out
+  store <2 x float> %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -98,10 +98,10 @@ define amdgpu_kernel void @fneg_fabsf_v2f32(<2 x float> addrspace(1)* %out, <2 x
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
 ; SI: s_bitset1_b32 s{{[0-9]+}}, 31
-define amdgpu_kernel void @fneg_fabsf_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
-  store <4 x float> %fsub, <4 x float> addrspace(1)* %out
+  store <4 x float> %fsub, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
index cca53985196ab..6fdabb80c4620 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -4,9 +4,9 @@
 
 ; FIXME: Should be able to do scalar op
 ; GCN-LABEL: {{^}}s_fneg_f16:
-define amdgpu_kernel void @s_fneg_f16(half addrspace(1)* %out, half %in) #0 {
+define amdgpu_kernel void @s_fneg_f16(ptr addrspace(1) %out, half %in) #0 {
   %fneg = fsub half -0.0, %in
-  store half %fneg, half addrspace(1)* %out
+  store half %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -18,13 +18,13 @@ define amdgpu_kernel void @s_fneg_f16(half addrspace(1)* %out, half %in) #0 {
 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
 ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
 ; SI: buffer_store_short [[XOR]]
-define amdgpu_kernel void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_fneg_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr inbounds half, half addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds half, half addrspace(1)* %in, i32 %tid
-  %val = load half, half addrspace(1)* %gep.in, align 2
+  %gep.in = getelementptr inbounds half, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds half, ptr addrspace(1) %in, i32 %tid
+  %val = load half, ptr addrspace(1) %gep.in, align 2
   %fneg = fsub half -0.0, %val
-  store half %fneg, half addrspace(1)* %gep.out
+  store half %fneg, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -33,10 +33,10 @@ define amdgpu_kernel void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)
 ; GCN: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}}
 ; GCN: v_mov_b32_e32 [[V_XOR:v[0-9]+]], [[XOR]]
 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V_XOR]]
-define amdgpu_kernel void @s_fneg_free_f16(half addrspace(1)* %out, i16 %in) #0 {
+define amdgpu_kernel void @s_fneg_free_f16(ptr addrspace(1) %out, i16 %in) #0 {
   %bc = bitcast i16 %in to half
   %fsub = fsub half -0.0, %bc
-  store half %fsub, half addrspace(1)* %out
+  store half %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -51,52 +51,52 @@ define amdgpu_kernel void @s_fneg_free_f16(half addrspace(1)* %out, i16 %in) #0
 
 ; VI-NOT: [[NEG_VALUE]]
 ; VI: v_mul_f16_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
-define amdgpu_kernel void @v_fneg_fold_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
-  %val = load half, half addrspace(1)* %in
+define amdgpu_kernel void @v_fneg_fold_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %val = load half, ptr addrspace(1) %in
   %fsub = fsub half -0.0, %val
   %fmul = fmul half %fsub, %val
-  store half %fmul, half addrspace(1)* %out
+  store half %fmul, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fneg_v2f16:
 ; GCN: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
-define amdgpu_kernel void @s_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @s_fneg_v2f16(ptr addrspace(1) %out, <2 x half> %in) #0 {
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %in
-  store <2 x half> %fneg, <2 x half> addrspace(1)* %out
+  store <2 x half> %fneg, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}s_fneg_v2f16_nonload:
 ; GCN: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
-define amdgpu_kernel void @s_fneg_v2f16_nonload(<2 x half> addrspace(1)* %out) #0 {
+define amdgpu_kernel void @s_fneg_v2f16_nonload(ptr addrspace(1) %out) #0 {
   %in = call i32 asm sideeffect "; def $0", "=s"()
   %in.bc = bitcast i32 %in to <2 x half>
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %in.bc
-  store <2 x half> %fneg, <2 x half> addrspace(1)* %out
+  store <2 x half> %fneg, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_fneg_v2f16:
 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
 ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VAL]]
-define amdgpu_kernel void @v_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @v_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %gep.out = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %in, i32 %tid
-  %val = load <2 x half>, <2 x half> addrspace(1)* %gep.in, align 2
+  %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
+  %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
-  store <2 x half> %fneg, <2 x half> addrspace(1)* %gep.out
+  store <2 x half> %fneg, ptr addrspace(1) %gep.out
   ret void
 }
 
 ; GCN-LABEL: {{^}}fneg_free_v2f16:
 ; GCN: s_load_dword [[VAL:s[0-9]+]]
 ; GCN: s_xor_b32 s{{[0-9]+}}, [[VAL]], 0x80008000
-define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 {
+define amdgpu_kernel void @fneg_free_v2f16(ptr addrspace(1) %out, i32 %in) #0 {
   %bc = bitcast i32 %in to <2 x half>
   %fsub = fsub <2 x half> <half -0.0, half -0.0>, %bc
-  store <2 x half> %fsub, <2 x half> addrspace(1)* %out
+  store <2 x half> %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -118,11 +118,11 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
 ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
 
 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}}
-define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
+define amdgpu_kernel void @v_fneg_fold_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %val = load <2 x half>, ptr addrspace(1) %in
   %fsub = fsub <2 x half> <half -0.0, half -0.0>, %val
   %fmul = fmul <2 x half> %fsub, %val
-  store <2 x half> %fmul, <2 x half> addrspace(1)* %out
+  store <2 x half> %fmul, ptr addrspace(1) %out
   ret void
 }
 
@@ -135,16 +135,16 @@ define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x
 ; GFX89-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
 ; GFX89-DAG: v_sub_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 
-define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
+define amdgpu_kernel void @v_extract_fneg_fold_v2f16(ptr addrspace(1) %in) #0 {
+  %val = load <2 x half>, ptr addrspace(1) %in
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
   %elt0 = extractelement <2 x half> %fneg, i32 0
   %elt1 = extractelement <2 x half> %fneg, i32 1
 
   %fmul0 = fmul half %elt0, 4.0
   %fadd1 = fadd half %elt1, 2.0
-  store volatile half %fmul0, half addrspace(1)* undef
-  store volatile half %fadd1, half addrspace(1)* undef
+  store volatile half %fmul0, ptr addrspace(1) undef
+  store volatile half %fadd1, ptr addrspace(1) undef
   ret void
 }
 
@@ -153,13 +153,13 @@ define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %i
 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]]
 ; CIVI: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
 ; GFX9: global_store_short_d16_hi v{{\[[0-9]+:[0-9]+\]}}, [[NEG]], off
-define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
+define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(ptr addrspace(1) %in) #0 {
+  %val = load <2 x half>, ptr addrspace(1) %in
   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
   %elt0 = extractelement <2 x half> %fneg, i32 0
   %elt1 = extractelement <2 x half> %fneg, i32 1
-  store volatile half %elt0, half addrspace(1)* undef
-  store volatile half %elt1, half addrspace(1)* undef
+  store volatile half %elt0, ptr addrspace(1) undef
+  store volatile half %elt1, ptr addrspace(1) undef
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll
index d35a81c2afffa..c001d35feabcc 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f64.ll
@@ -3,18 +3,18 @@
 
 ; FUNC-LABEL: {{^}}fneg_f64:
 ; GCN: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-define amdgpu_kernel void @fneg_f64(double addrspace(1)* %out, double %in) {
+define amdgpu_kernel void @fneg_f64(ptr addrspace(1) %out, double %in) {
   %fneg = fsub double -0.000000e+00, %in
-  store double %fneg, double addrspace(1)* %out
+  store double %fneg, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}fneg_v2f64:
 ; GCN: s_xor_b32
 ; GCN: s_xor_b32
-define amdgpu_kernel void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
+define amdgpu_kernel void @fneg_v2f64(ptr addrspace(1) nocapture %out, <2 x double> %in) {
   %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
-  store <2 x double> %fneg, <2 x double> addrspace(1)* %out
+  store <2 x double> %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -28,9 +28,9 @@ define amdgpu_kernel void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out,
 ; GCN: s_xor_b32
 ; GCN: s_xor_b32
 ; GCN: s_xor_b32
-define amdgpu_kernel void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
+define amdgpu_kernel void @fneg_v4f64(ptr addrspace(1) nocapture %out, <4 x double> %in) {
   %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
-  store <4 x double> %fneg, <4 x double> addrspace(1)* %out
+  store <4 x double> %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -40,10 +40,10 @@ define amdgpu_kernel void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out,
 
 ; FUNC-LABEL: {{^}}fneg_free_f64:
 ; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -{{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
-define amdgpu_kernel void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @fneg_free_f64(ptr addrspace(1) %out, i64 %in) {
   %bc = bitcast i64 %in to double
   %fsub = fsub double 0.0, %bc
-  store double %fsub, double addrspace(1)* %out
+  store double %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -52,9 +52,9 @@ define amdgpu_kernel void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
 ; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
 ; GCN-NOT: xor
 ; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
-define amdgpu_kernel void @fneg_fold_f64(double addrspace(1)* %out, [8 x i32], double %in) {
+define amdgpu_kernel void @fneg_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in) {
   %fsub = fsub double -0.0, %in
   %fmul = fmul double %fsub, %in
-  store double %fmul, double addrspace(1)* %out
+  store double %fmul, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll
index 291ca5f8fe57c..eeba9760481a2 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.ll
@@ -8,9 +8,9 @@
 ; GCN: s_load_dword [[VAL:s[0-9]+]]
 ; GCN: s_xor_b32 [[NEG_VAL:s[0-9]+]], [[VAL]], 0x80000000
 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, [[NEG_VAL]]
-define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @s_fneg_f32(ptr addrspace(1) %out, float %in) {
   %fneg = fsub float -0.000000e+00, %in
-  store float %fneg, float addrspace(1)* %out
+  store float %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -20,9 +20,9 @@ define amdgpu_kernel void @s_fneg_f32(float addrspace(1)* %out, float %in) {
 
 ; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
 ; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
-define amdgpu_kernel void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
+define amdgpu_kernel void @s_fneg_v2f32(ptr addrspace(1) nocapture %out, <2 x float> %in) {
   %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
-  store <2 x float> %fneg, <2 x float> addrspace(1)* %out
+  store <2 x float> %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -36,9 +36,9 @@ define amdgpu_kernel void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out
 ; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
 ; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
 ; GCN: s_xor_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x80000000
-define amdgpu_kernel void @s_fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
+define amdgpu_kernel void @s_fneg_v4f32(ptr addrspace(1) nocapture %out, <4 x float> %in) {
   %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
-  store <4 x float> %fneg, <4 x float> addrspace(1)* %out
+  store <4 x float> %fneg, ptr addrspace(1) %out
   ret void
 }
 
@@ -52,10 +52,10 @@ define amdgpu_kernel void @s_fneg_v4f32(<4 x float> addrspace(1)* nocapture %out
 
 ; R600-NOT: XOR
 ; R600: -KC0[2].Z
-define amdgpu_kernel void @fsub0_f32(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @fsub0_f32(ptr addrspace(1) %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fsub = fsub float 0.0, %bc
-  store float %fsub, float addrspace(1)* %out
+  store float %fsub, ptr addrspace(1) %out
   ret void
 }
 ; FUNC-LABEL: {{^}}fneg_free_f32:
@@ -68,10 +68,10 @@ define amdgpu_kernel void @fsub0_f32(float addrspace(1)* %out, i32 %in) {
 
 ; R600-NOT: XOR
 ; R600: -PV.W
-define amdgpu_kernel void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @fneg_free_f32(ptr addrspace(1) %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fsub = fsub float -0.0, %bc
-  store float %fsub, float addrspace(1)* %out
+  store float %fsub, ptr addrspace(1) %out
   ret void
 }
 
@@ -80,21 +80,21 @@ define amdgpu_kernel void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
 ; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
 ; GCN-NOT: xor
 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
-define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @fneg_fold_f32(ptr addrspace(1) %out, float %in) {
   %fsub = fsub float -0.0, %in
   %fmul = fmul float %fsub, %in
-  store float %fmul, float addrspace(1)* %out
+  store float %fmul, ptr addrspace(1) %out
   ret void
 }
 
 ; Make sure we turn some integer operations back into fabs
 ; FUNC-LABEL: {{^}}bitpreserve_fneg_f32:
 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0
-define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) {
+define amdgpu_kernel void @bitpreserve_fneg_f32(ptr addrspace(1) %out, float %in) {
   %in.bc = bitcast float %in to i32
   %int.abs = xor i32 %in.bc, 2147483648
   %bc = bitcast i32 %int.abs to float
   %fadd = fmul float %bc, 4.0
-  store float %fadd, float addrspace(1)* %out
+  store float %fadd, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index d441654318722..32dd8bfc2de22 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -9,33 +9,33 @@ declare double @llvm.fabs.f64(double) #1
 ; GCN: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
 ; GCN-NOT: v_cmp
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) #0 {
   %fabs = tail call float @llvm.fabs.f32(float %x) #1
   %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
   %ext = zext i1 %cmp to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_not_isinf_pattern_0:
 ; GCN-NOT: v_cmp_class
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
   %fabs = tail call float @llvm.fabs.f32(float %x) #1
   %cmp = fcmp ueq float %fabs, 0x7FF0000000000000
   %ext = zext i1 %cmp to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}test_not_isinf_pattern_1:
 ; GCN-NOT: v_cmp_class
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
   %fabs = tail call float @llvm.fabs.f32(float %x) #1
   %cmp = fcmp oeq float %fabs, 0xFFF0000000000000
   %ext = zext i1 %cmp to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -45,13 +45,13 @@ define amdgpu_kernel void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture
 ; GCN: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
 ; GCN-NOT: v_cmp
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -61,11 +61,11 @@ define amdgpu_kernel void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %
 ; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
 ; SI-NOT: v_cmp
 ; SI: s_endpgm
-define amdgpu_kernel void @test_isfinite_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #3
   %cmpinf = fcmp one float %x.fabs, 0x7FF0000000000000
   %ext = zext i1 %cmpinf to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -73,13 +73,13 @@ define amdgpu_kernel void @test_isfinite_pattern_1(i32 addrspace(1)* nocapture %
 ; GCN-LABEL: {{^}}test_isfinite_not_pattern_0:
 ; GCN-NOT: v_cmp_class_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -87,12 +87,12 @@ define amdgpu_kernel void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocaptu
 ; GCN-LABEL: {{^}}test_isfinite_not_pattern_1:
 ; GCN-NOT: v_cmp_class_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %ninf = fcmp une float %x, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -100,13 +100,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocaptu
 ; GCN-LABEL: {{^}}test_isfinite_not_pattern_2:
 ; GCN-NOT: v_cmp_class_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocapture %out, float %x, float %y) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -114,13 +114,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocaptu
 ; GCN-LABEL: {{^}}test_isfinite_not_pattern_3:
 ; GCN-NOT: v_cmp_class_f32
 ; GCN: s_endpgm
-define amdgpu_kernel void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp uno float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -129,13 +129,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocaptu
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1f8
 ; GCN-DAG: v_cmp_class_f32_e32 vcc, [[X]], [[K]]
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
-define amdgpu_kernel void @test_isfinite_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -144,13 +144,13 @@ define amdgpu_kernel void @test_isfinite_pattern_4(i32 addrspace(1)* nocapture %
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1f8
 ; GCN-DAG: v_cmp_class_f32_e32 vcc, [[X]], [[K]]
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
-define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(i32 addrspace(1)* nocapture %out, float %x) #0 {
+define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) nocapture %out, float %x) #0 {
   %ord = fcmp ord float %x, 0.000000e+00
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ninf, %ord
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -170,13 +170,13 @@ define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(i32 addrspace(1)*
 ; VI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP]], vcc
 
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[AND]]
-define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(i32 addrspace(1)* nocapture %out, float %x, [8 x i32], float %y) #0 {
+define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrspace(1) nocapture %out, float %x, [8 x i32], float %y) #0 {
   %ord = fcmp ord float %x, %y
   %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
   %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
   %and = and i1 %ord, %ninf
   %ext = zext i1 %and to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
index 2df8388524cbe..278684467ed07 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
@@ -27,11 +27,11 @@ declare double @llvm.floor.f64(double) #0
 ; GCN-UNSAFE: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]]
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
-  %x = load double, double addrspace(1)* %src
+define amdgpu_kernel void @fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load double, ptr addrspace(1) %src
   %floor.x = call double @llvm.floor.f64(double %x)
   %fract = fsub double %x, %floor.x
-  store double %fract, double addrspace(1)* %out
+  store double %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -54,12 +54,12 @@ define amdgpu_kernel void @fract_f64(double addrspace(1)* %out, double addrspace
 ; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -[[X]]
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
-  %x = load double, double addrspace(1)* %src
+define amdgpu_kernel void @fract_f64_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load double, ptr addrspace(1) %src
   %neg.x = fsub double -0.0, %x
   %floor.neg.x = call double @llvm.floor.f64(double %neg.x)
   %fract = fsub double %neg.x, %floor.neg.x
-  store double %fract, double addrspace(1)* %out
+  store double %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -82,13 +82,13 @@ define amdgpu_kernel void @fract_f64_neg(double addrspace(1)* %out, double addrs
 ; GCN-UNSAFE: v_fract_f64_e64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|[[X]]|
 
 ; GCN: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
-  %x = load double, double addrspace(1)* %src
+define amdgpu_kernel void @fract_f64_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load double, ptr addrspace(1) %src
   %abs.x = call double @llvm.fabs.f64(double %x)
   %neg.abs.x = fsub double -0.0, %abs.x
   %floor.neg.abs.x = call double @llvm.floor.f64(double %neg.abs.x)
   %fract = fsub double %neg.abs.x, %floor.neg.abs.x
-  store double %fract, double addrspace(1)* %out
+  store double %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -98,12 +98,12 @@ define amdgpu_kernel void @fract_f64_neg_abs(double addrspace(1)* %out, double a
 ; VI-UNSAFE-DAG: v_fract_f64_e32 [[FRACT:v\[[0-9]+:[0-9]+\]]], [[X]]
 ; VI-UNSAFE: buffer_store_dwordx2 [[FLOOR]]
 ; VI-UNSAFE: buffer_store_dwordx2 [[FRACT]]
-define amdgpu_kernel void @multi_use_floor_fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 {
-  %x = load double, double addrspace(1)* %src
+define amdgpu_kernel void @multi_use_floor_fract_f64(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load double, ptr addrspace(1) %src
   %floor.x = call double @llvm.floor.f64(double %x)
   %fract = fsub double %x, %floor.x
-  store volatile double %floor.x, double addrspace(1)* %out
-  store volatile double %fract, double addrspace(1)* %out
+  store volatile double %floor.x, ptr addrspace(1) %out
+  store volatile double %fract, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fract.ll b/llvm/test/CodeGen/AMDGPU/fract.ll
index 5c94d327bf28f..bf9b4b63e31d9 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.ll
@@ -14,11 +14,11 @@ declare float @llvm.floor.f32(float) #0
 ; GCN-UNSAFE: v_fract_f32_e32 [[RESULT:v[0-9]+]], [[INPUT:v[0-9]+]]
 
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
-  %x = load float, float addrspace(1)* %src
+define amdgpu_kernel void @fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load float, ptr addrspace(1) %src
   %floor.x = call float @llvm.floor.f32(float %x)
   %fract = fsub float %x, %floor.x
-  store float %fract, float addrspace(1)* %out
+  store float %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -29,12 +29,12 @@ define amdgpu_kernel void @fract_f32(float addrspace(1)* %out, float addrspace(1
 ; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -[[INPUT:v[0-9]+]]
 
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
-  %x = load float, float addrspace(1)* %src
+define amdgpu_kernel void @fract_f32_neg(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load float, ptr addrspace(1) %src
   %x.neg = fsub float -0.0, %x
   %floor.x.neg = call float @llvm.floor.f32(float %x.neg)
   %fract = fsub float %x.neg, %floor.x.neg
-  store float %fract, float addrspace(1)* %out
+  store float %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -45,13 +45,13 @@ define amdgpu_kernel void @fract_f32_neg(float addrspace(1)* %out, float addrspa
 ; GCN-UNSAFE: v_fract_f32_e64 [[RESULT:v[0-9]+]], -|[[INPUT:v[0-9]+]]|
 
 ; GCN: buffer_store_dword [[RESULT]]
-define amdgpu_kernel void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
-  %x = load float, float addrspace(1)* %src
+define amdgpu_kernel void @fract_f32_neg_abs(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load float, ptr addrspace(1) %src
   %abs.x = call float @llvm.fabs.f32(float %x)
   %neg.abs.x = fsub float -0.0, %abs.x
   %floor.neg.abs.x = call float @llvm.floor.f32(float %neg.abs.x)
   %fract = fsub float %neg.abs.x, %floor.neg.abs.x
-  store float %fract, float addrspace(1)* %out
+  store float %fract, ptr addrspace(1) %out
   ret void
 }
 
@@ -61,12 +61,12 @@ define amdgpu_kernel void @fract_f32_neg_abs(float addrspace(1)* %out, float add
 
 ; GCN-UNSAFE: buffer_store_dword [[FLOOR]]
 ; GCN-UNSAFE: buffer_store_dword [[FRACT]]
-define amdgpu_kernel void @multi_use_floor_fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 {
-  %x = load float, float addrspace(1)* %src
+define amdgpu_kernel void @multi_use_floor_fract_f32(ptr addrspace(1) %out, ptr addrspace(1) %src) #1 {
+  %x = load float, ptr addrspace(1) %src
   %floor.x = call float @llvm.floor.f32(float %x)
   %fract = fsub float %x, %floor.x
-  store volatile float %floor.x, float addrspace(1)* %out
-  store volatile float %fract, float addrspace(1)* %out
+  store volatile float %floor.x, ptr addrspace(1) %out
+  store volatile float %fract, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 186757e4c5d84..a53d3956522ab 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -3,19 +3,19 @@
 
 ; FUNC-LABEL: {{^}}v_safe_fsqrt_f64:
 ; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_safe_fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %r0 = load double, double addrspace(1)* %in
+define amdgpu_kernel void @v_safe_fsqrt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+  %r0 = load double, ptr addrspace(1) %in
   %r1 = call double @llvm.sqrt.f64(double %r0)
-  store double %r1, double addrspace(1)* %out
+  store double %r1, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f64:
 ; GCN: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @v_unsafe_fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #2 {
-  %r0 = load double, double addrspace(1)* %in
+define amdgpu_kernel void @v_unsafe_fsqrt_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
+  %r0 = load double, ptr addrspace(1) %in
   %r1 = call double @llvm.sqrt.f64(double %r0)
-  store double %r1, double addrspace(1)* %out
+  store double %r1, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fsqrt.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.ll
index ba5f79d9bf5f5..356eed7b31a71 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.ll
@@ -7,19 +7,19 @@
 
 ; FUNC-LABEL: {{^}}v_safe_fsqrt_f32:
 ; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @v_safe_fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %r0 = load float, float addrspace(1)* %in
+define amdgpu_kernel void @v_safe_fsqrt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+  %r0 = load float, ptr addrspace(1) %in
   %r1 = call float @llvm.sqrt.f32(float %r0)
-  store float %r1, float addrspace(1)* %out
+  store float %r1, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f32:
 ; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @v_unsafe_fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #2 {
-  %r0 = load float, float addrspace(1)* %in
+define amdgpu_kernel void @v_unsafe_fsqrt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
+  %r0 = load float, ptr addrspace(1) %in
   %r1 = call float @llvm.sqrt.f32(float %r0)
-  store float %r1, float addrspace(1)* %out
+  store float %r1, ptr addrspace(1) %out
   ret void
 }
 
@@ -29,10 +29,10 @@ define amdgpu_kernel void @v_unsafe_fsqrt_f32(float addrspace(1)* %out, float ad
 
 ; R600: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; R600: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, PS
-define amdgpu_kernel void @s_sqrt_f32(float addrspace(1)* %out, float %in) #1 {
+define amdgpu_kernel void @s_sqrt_f32(ptr addrspace(1) %out, float %in) #1 {
 entry:
   %fdiv = call float @llvm.sqrt.f32(float %in)
-  store float %fdiv, float addrspace(1)* %out
+  store float %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -44,10 +44,10 @@ entry:
 ; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, PS
 ; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
-define amdgpu_kernel void @s_sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+define amdgpu_kernel void @s_sqrt_v2f32(ptr addrspace(1) %out, <2 x float> %in) #1 {
 entry:
   %fdiv = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
-  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
+  store <2 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
@@ -65,46 +65,46 @@ entry:
 ; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
 ; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[4].X
 ; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
-define amdgpu_kernel void @s_sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 {
+define amdgpu_kernel void @s_sqrt_v4f32(ptr addrspace(1) %out, <4 x float> %in) #1 {
 entry:
   %fdiv = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
-  store <4 x float> %fdiv, <4 x float> addrspace(1)* %out
+  store <4 x float> %fdiv, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}elim_redun_check_neg0:
 ; GCN: v_sqrt_f32_e32
 ; GCN-NOT: v_cndmask
-define amdgpu_kernel void @elim_redun_check_neg0(float addrspace(1)* %out, float %in) #1 {
+define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %in) #1 {
 entry:
   %sqrt = call float @llvm.sqrt.f32(float %in)
   %cmp = fcmp olt float %in, -0.000000e+00
   %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
-  store float %res, float addrspace(1)* %out
+  store float %res, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}elim_redun_check_pos0:
 ; GCN: v_sqrt_f32_e32
 ; GCN-NOT: v_cndmask
-define amdgpu_kernel void @elim_redun_check_pos0(float addrspace(1)* %out, float %in) #1 {
+define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %in) #1 {
 entry:
   %sqrt = call float @llvm.sqrt.f32(float %in)
   %cmp = fcmp olt float %in, 0.000000e+00
   %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
-  store float %res, float addrspace(1)* %out
+  store float %res, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}elim_redun_check_ult:
 ; GCN: v_sqrt_f32_e32
 ; GCN-NOT: v_cndmask
-define amdgpu_kernel void @elim_redun_check_ult(float addrspace(1)* %out, float %in) #1 {
+define amdgpu_kernel void @elim_redun_check_ult(ptr addrspace(1) %out, float %in) #1 {
 entry:
   %sqrt = call float @llvm.sqrt.f32(float %in)
   %cmp = fcmp ult float %in, -0.000000e+00
   %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
-  store float %res, float addrspace(1)* %out
+  store float %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -112,12 +112,12 @@ entry:
 ; GCN: v_sqrt_f32_e32
 ; GCN: v_sqrt_f32_e32
 ; GCN-NOT: v_cndmask
-define amdgpu_kernel void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float> %in) #1 {
 entry:
   %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
   %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
   %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -125,22 +125,22 @@ entry:
 ; GCN: v_sqrt_f32_e32
 ; GCN: v_sqrt_f32_e32
 ; GCN-NOT: v_cndmask
-define amdgpu_kernel void @elim_redun_check_v2_ult(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 {
+define amdgpu_kernel void @elim_redun_check_v2_ult(ptr addrspace(1) %out, <2 x float> %in) #1 {
 entry:
   %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
   %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
   %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
 ; FUNC-LABEL: {{^}}recip_sqrt:
 ; R600: RECIPSQRT_IEEE
 ; R600-NOT: RECIP_IEEE
-define amdgpu_kernel void @recip_sqrt(float addrspace(1)* %out, float %src) nounwind {
+define amdgpu_kernel void @recip_sqrt(ptr addrspace(1) %out, float %src) nounwind {
   %sqrt = call float @llvm.sqrt.f32(float %src)
   %recipsqrt = fdiv fast float 1.0, %sqrt
-  store float %recipsqrt, float addrspace(1)* %out, align 4
+  store float %recipsqrt, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
index be4de04e2fb62..d3f7f77066c7e 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -13,14 +13,14 @@
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fsub_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = fsub half %a.val, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -33,12 +33,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fsub_f16_imm_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load volatile half, half addrspace(1)* %b
+  %b.val = load volatile half, ptr addrspace(1) %b
   %r.val = fsub half 1.0, %b.val
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -51,12 +51,12 @@ entry:
 ; GCN: buffer_store_short v[[R_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @fsub_f16_imm_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
+  %a.val = load volatile half, ptr addrspace(1) %a
   %r.val = fsub half %a.val, 2.0
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -95,14 +95,14 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fsub_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = fsub <2 x half> %a.val, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -131,12 +131,12 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fsub_v2f16_imm_a(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %b) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %b) {
 entry:
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   %r.val = fsub <2 x half> <half 1.0, half 2.0>, %b.val
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -165,11 +165,11 @@ entry:
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @fsub_v2f16_imm_b(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a) {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a) {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = fsub <2 x half> %a.val, <half 2.0, half 1.0>
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 6e4635ec43877..0c42800bf56cf 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -4,12 +4,12 @@
 
 ; FUNC-LABEL: {{^}}v_fsub_f32:
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float, float addrspace(1)* %in, align 4
-  %b = load float, float addrspace(1)* %b_ptr, align 4
+define amdgpu_kernel void @v_fsub_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %a = load float, ptr addrspace(1) %in, align 4
+  %b = load float, ptr addrspace(1) %b_ptr, align 4
   %result = fsub float %a, %b
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -17,9 +17,9 @@ define amdgpu_kernel void @v_fsub_f32(float addrspace(1)* %out, float addrspace(
 ; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
 
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
+define amdgpu_kernel void @s_fsub_f32(ptr addrspace(1) %out, float %a, float %b) {
   %sub = fsub float %a, %b
-  store float %sub, float addrspace(1)* %out, align 4
+  store float %sub, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -29,9 +29,9 @@ define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float
 
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
+define amdgpu_kernel void @fsub_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) {
   %sub = fsub <2 x float> %a, %b
-  store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
+  store <2 x float> %sub, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -45,12 +45,12 @@ define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
+define amdgpu_kernel void @v_fsub_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x float>, ptr addrspace(1) %in, align 16
+  %b = load <4 x float>, ptr addrspace(1) %b_ptr, align 16
   %result = fsub <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %result, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -60,48 +60,48 @@ define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x flo
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
+define amdgpu_kernel void @s_fsub_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) {
   %result = fsub <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %result, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_fneg_fsub_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float, float addrspace(1)* %in, align 4
-  %b = load float, float addrspace(1)* %b_ptr, align 4
+define amdgpu_kernel void @v_fneg_fsub_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %a = load float, ptr addrspace(1) %in, align 4
+  %b = load float, ptr addrspace(1) %b_ptr, align 4
   %result = fsub float %a, %b
   %neg.result = fsub float -0.0, %result
-  store float %neg.result, float addrspace(1)* %out, align 4
+  store float %neg.result, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float, float addrspace(1)* %in, align 4
-  %b = load float, float addrspace(1)* %b_ptr, align 4
+define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %a = load float, ptr addrspace(1) %in, align 4
+  %b = load float, ptr addrspace(1) %b_ptr, align 4
   %result = fsub nsz float %a, %b
   %neg.result = fsub float -0.0, %result
-  store float %neg.result, float addrspace(1)* %out, align 4
+  store float %neg.result, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float, float addrspace(1)* %in, align 4
-  %b = load float, float addrspace(1)* %b_ptr, align 4
+define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %a = load float, ptr addrspace(1) %in, align 4
+  %b = load float, ptr addrspace(1) %b_ptr, align 4
   %result = fsub float %a, %b
   %neg.result = fsub float -0.0, %result
-  store float %neg.result, float addrspace(1)* %out, align 4
+  store float %neg.result, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -111,22 +111,22 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(float addrspace(1)* %ou
 ; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
 ; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
 ; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float, float addrspace(1)* %in, align 4
-  %b = load float, float addrspace(1)* %b_ptr, align 4
+define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %a = load float, ptr addrspace(1) %in, align 4
+  %b = load float, ptr addrspace(1) %b_ptr, align 4
   %result = fsub float %a, %b
   %neg.result = fsub float -0.0, %result
-  store float %neg.result, float addrspace(1)* %out, align 4
+  store float %neg.result, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
 ; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %a = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+  %a = load float, ptr addrspace(1) %in, align 4
   %result = fsub float %a, 0.0
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fsub64.ll b/llvm/test/CodeGen/AMDGPU/fsub64.ll
index 73f1a69eeb9d6..67267bdd7ebc0 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub64.ll
@@ -5,77 +5,77 @@ declare double @llvm.fabs.f64(double) #0
 
 ; SI-LABEL: {{^}}fsub_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                      double addrspace(1)* %in2) {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
+define amdgpu_kernel void @fsub_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                      ptr addrspace(1) %in2) {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
   %r2 = fsub double %r0, %r1
-  store double %r2, double addrspace(1)* %out
+  store double %r2, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}fsub_fabs_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
-define amdgpu_kernel void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                           double addrspace(1)* %in2) {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
+define amdgpu_kernel void @fsub_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                           ptr addrspace(1) %in2) {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
   %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
   %r2 = fsub double %r0, %r1.fabs
-  store double %r2, double addrspace(1)* %out
+  store double %r2, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}fsub_fabs_inv_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                               double addrspace(1)* %in2) {
-  %r0 = load double, double addrspace(1)* %in1
-  %r1 = load double, double addrspace(1)* %in2
+define amdgpu_kernel void @fsub_fabs_inv_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
+                               ptr addrspace(1) %in2) {
+  %r0 = load double, ptr addrspace(1) %in1
+  %r1 = load double, ptr addrspace(1) %in2
   %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
   %r2 = fsub double %r0.fabs, %r1
-  store double %r2, double addrspace(1)* %out
+  store double %r2, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_fsub_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
+define amdgpu_kernel void @s_fsub_f64(ptr addrspace(1) %out, double %a, double %b) {
   %sub = fsub double %a, %b
-  store double %sub, double addrspace(1)* %out
+  store double %sub, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_fsub_imm_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}, 4.0
-define amdgpu_kernel void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
+define amdgpu_kernel void @s_fsub_imm_f64(ptr addrspace(1) %out, double %a, double %b) {
   %sub = fsub double 4.0, %a
-  store double %sub, double addrspace(1)* %out
+  store double %sub, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}}, -4.0
-define amdgpu_kernel void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
+define amdgpu_kernel void @s_fsub_imm_inv_f64(ptr addrspace(1) %out, double %a, double %b) {
   %sub = fsub double %a, 4.0
-  store double %sub, double addrspace(1)* %out
+  store double %sub, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}s_fsub_self_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_fsub_self_f64(double addrspace(1)* %out, double %a) {
+define amdgpu_kernel void @s_fsub_self_f64(ptr addrspace(1) %out, double %a) {
   %sub = fsub double %a, %a
-  store double %sub, double addrspace(1)* %out
+  store double %sub, ptr addrspace(1) %out
   ret void
 }
 
 ; SI-LABEL: {{^}}fsub_v2f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) {
+define amdgpu_kernel void @fsub_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) {
   %sub = fsub <2 x double> %a, %b
-  store <2 x double> %sub, <2 x double> addrspace(1)* %out
+  store <2 x double> %sub, ptr addrspace(1) %out
   ret void
 }
 
@@ -84,12 +84,12 @@ define amdgpu_kernel void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %a = load <4 x double>, <4 x double> addrspace(1)* %in
-  %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr
+define amdgpu_kernel void @fsub_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+  %b_ptr = getelementptr <4 x double>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x double>, ptr addrspace(1) %in
+  %b = load <4 x double>, ptr addrspace(1) %b_ptr
   %result = fsub <4 x double> %a, %b
-  store <4 x double> %result, <4 x double> addrspace(1)* %out
+  store <4 x double> %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -98,9 +98,9 @@ define amdgpu_kernel void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x doub
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
-define amdgpu_kernel void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) {
+define amdgpu_kernel void @s_fsub_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) {
   %result = fsub <4 x double> %a, %b
-  store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16
+  store <4 x double> %result, ptr addrspace(1) %out, align 16
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index 7f08cb19a5f52..4f3f8c18f5018 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -4,7 +4,7 @@
 
 ; half args should be promoted to float for CI and lower.
 
-define amdgpu_kernel void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
+define amdgpu_kernel void @load_f16_arg(ptr addrspace(1) %out, half %arg) #0 {
 ; CI-LABEL: load_f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -26,11 +26,11 @@ define amdgpu_kernel void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  store half %arg, half addrspace(1)* %out
+  store half %arg, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @load_v2f16_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
 ; CI-LABEL: load_v2f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -52,11 +52,11 @@ define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x ha
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  store <2 x half> %arg, <2 x half> addrspace(1)* %out
+  store <2 x half> %arg, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @load_v3f16_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
 ; GCN-LABEL: load_v3f16_arg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -72,13 +72,13 @@ define amdgpu_kernel void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x ha
 ; GCN-NEXT:    flat_store_short v[2:3], v4
 ; GCN-NEXT:    flat_store_dword v[0:1], v5
 ; GCN-NEXT:    s_endpgm
-  store <3 x half> %arg, <3 x half> addrspace(1)* %out
+  store <3 x half> %arg, ptr addrspace(1) %out
   ret void
 }
 
 
 ; FIXME: Why not one load?
-define amdgpu_kernel void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @load_v4f16_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
 ; GCN-LABEL: load_v4f16_arg:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -89,11 +89,11 @@ define amdgpu_kernel void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x ha
 ; GCN-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GCN-NEXT:    s_endpgm
-  store <4 x half> %arg, <4 x half> addrspace(1)* %out
+  store <4 x half> %arg, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @load_v8f16_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
 ; CI-LABEL: load_v8f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
@@ -121,11 +121,11 @@ define amdgpu_kernel void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x ha
 ; VI-NEXT:    v_mov_b32_e32 v3, s3
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  store <8 x half> %arg, <8 x half> addrspace(1)* %out
+  store <8 x half> %arg, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
+define amdgpu_kernel void @extload_v2f16_arg(ptr addrspace(1) %out, <2 x half> %in) #0 {
 ; CI-LABEL: extload_v2f16_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -152,11 +152,11 @@ define amdgpu_kernel void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %fpext = fpext <2 x half> %in to <2 x float>
-  store <2 x float> %fpext, <2 x float> addrspace(1)* %out
+  store <2 x float> %fpext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
+define amdgpu_kernel void @extload_f16_to_f32_arg(ptr addrspace(1) %out, half %arg) #0 {
 ; CI-LABEL: extload_f16_to_f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -179,11 +179,11 @@ define amdgpu_kernel void @extload_f16_to_f32_arg(float addrspace(1)* %out, half
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %ext = fpext half %arg to float
-  store float %ext, float addrspace(1)* %out
+  store float %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
 ; CI-LABEL: extload_v2f16_to_v2f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -210,11 +210,11 @@ define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <2 x half> %arg to <2 x float>
-  store <2 x float> %ext, <2 x float> addrspace(1)* %out
+  store <2 x float> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
 ; CI-LABEL: extload_v3f16_to_v3f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -241,11 +241,11 @@ define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <3 x half> %arg to <3 x float>
-  store <3 x float> %ext, <3 x float> addrspace(1)* %out
+  store <3 x float> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
 ; CI-LABEL: extload_v4f16_to_v4f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -276,11 +276,11 @@ define amdgpu_kernel void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <4 x half> %arg to <4 x float>
-  store <4 x float> %ext, <4 x float> addrspace(1)* %out
+  store <4 x float> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
 ; CI-LABEL: extload_v8f16_to_v8f32_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -337,11 +337,11 @@ define amdgpu_kernel void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <8 x half> %arg to <8 x float>
-  store <8 x float> %ext, <8 x float> addrspace(1)* %out
+  store <8 x float> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
+define amdgpu_kernel void @extload_f16_to_f64_arg(ptr addrspace(1) %out, half %arg) #0 {
 ; CI-LABEL: extload_f16_to_f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -368,11 +368,11 @@ define amdgpu_kernel void @extload_f16_to_f64_arg(double addrspace(1)* %out, hal
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %ext = fpext half %arg to double
-  store double %ext, double addrspace(1)* %out
+  store double %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(ptr addrspace(1) %out, <2 x half> %arg) #0 {
 ; CI-LABEL: extload_v2f16_to_v2f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -405,11 +405,11 @@ define amdgpu_kernel void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <2 x half> %arg to <2 x double>
-  store <2 x double> %ext, <2 x double> addrspace(1)* %out
+  store <2 x double> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(ptr addrspace(1) %out, <3 x half> %arg) #0 {
 ; CI-LABEL: extload_v3f16_to_v3f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -452,11 +452,11 @@ define amdgpu_kernel void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <3 x half> %arg to <3 x double>
-  store <3 x double> %ext, <3 x double> addrspace(1)* %out
+  store <3 x double> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(ptr addrspace(1) %out, <4 x half> %arg) #0 {
 ; CI-LABEL: extload_v4f16_to_v4f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -507,11 +507,11 @@ define amdgpu_kernel void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <4 x half> %arg to <4 x double>
-  store <4 x double> %ext, <4 x double> addrspace(1)* %out
+  store <4 x double> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
+define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(ptr addrspace(1) %out, <8 x half> %arg) #0 {
 ; CI-LABEL: extload_v8f16_to_v8f64_arg:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x4
@@ -606,11 +606,11 @@ define amdgpu_kernel void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)*
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %ext = fpext <8 x half> %arg to <8 x double>
-  store <8 x double> %ext, <8 x double> addrspace(1)* %out
+  store <8 x double> %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_store_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_load_store_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -623,12 +623,12 @@ define amdgpu_kernel void @global_load_store_f16(half addrspace(1)* %out, half a
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_short v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %in
-  store half %val, half addrspace(1)* %out
+  %val = load half, ptr addrspace(1) %in
+  store half %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_store_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_load_store_v2f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -641,12 +641,12 @@ define amdgpu_kernel void @global_load_store_v2f16(<2 x half> addrspace(1)* %out
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
-  store <2 x half> %val, <2 x half> addrspace(1)* %out
+  %val = load <2 x half>, ptr addrspace(1) %in
+  store <2 x half> %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
+define amdgpu_kernel void @global_load_store_v4f16(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: global_load_store_v4f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -659,12 +659,12 @@ define amdgpu_kernel void @global_load_store_v4f16(<4 x half> addrspace(1)* %in,
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GCN-NEXT:    s_endpgm
-  %val = load <4 x half>, <4 x half> addrspace(1)* %in
-  store <4 x half> %val, <4 x half> addrspace(1)* %out
+  %val = load <4 x half>, ptr addrspace(1) %in
+  store <4 x half> %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_load_store_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_load_store_v8f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -677,12 +677,12 @@ define amdgpu_kernel void @global_load_store_v8f16(<8 x half> addrspace(1)* %out
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
-  %val = load <8 x half>, <8 x half> addrspace(1)* %in
-  store <8 x half> %val, <8 x half> addrspace(1)* %out
+  %val = load <8 x half>, ptr addrspace(1) %in
+  store <8 x half> %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_extload_f16_to_f32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -696,13 +696,13 @@ define amdgpu_kernel void @global_extload_f16_to_f32(float addrspace(1)* %out, h
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %in
+  %val = load half, ptr addrspace(1) %in
   %cvt = fpext half %val to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v2f16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v2f16_to_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -733,13 +733,13 @@ define amdgpu_kernel void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1
 ; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
+  %val = load <2 x half>, ptr addrspace(1) %in
   %cvt = fpext <2 x half> %val to <2 x float>
-  store <2 x float> %cvt, <2 x float> addrspace(1)* %out
+  store <2 x float> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v3f16_to_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v3f16_to_v3f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -772,13 +772,13 @@ define amdgpu_kernel void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1
 ; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-NEXT:    s_endpgm
-  %val = load <3 x half>, <3 x half> addrspace(1)* %in
+  %val = load <3 x half>, ptr addrspace(1) %in
   %cvt = fpext <3 x half> %val to <3 x float>
-  store <3 x float> %cvt, <3 x float> addrspace(1)* %out
+  store <3 x float> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v4f16_to_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v4f16_to_v4f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -814,13 +814,13 @@ define amdgpu_kernel void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <4 x half>, <4 x half> addrspace(1)* %in
+  %val = load <4 x half>, ptr addrspace(1) %in
   %cvt = fpext <4 x half> %val to <4 x float>
-  store <4 x float> %cvt, <4 x float> addrspace(1)* %out
+  store <4 x float> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v8f16_to_v8f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v8f16_to_v8f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -876,13 +876,13 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1
 ; VI-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
 ; VI-NEXT:    flat_store_dwordx4 v[12:13], v[4:7]
 ; VI-NEXT:    s_endpgm
-  %val = load <8 x half>, <8 x half> addrspace(1)* %in
+  %val = load <8 x half>, ptr addrspace(1) %in
   %cvt = fpext <8 x half> %val to <8 x float>
-  store <8 x float> %cvt, <8 x float> addrspace(1)* %out
+  store <8 x float> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v16f16_to_v16f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v16f16_to_v16f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -990,13 +990,13 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f32(<16 x float> addrspac
 ; VI-NEXT:    flat_store_dwordx4 v[20:21], v[12:15]
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <16 x half>, <16 x half> addrspace(1)* %in
+  %val = load <16 x half>, ptr addrspace(1) %in
   %cvt = fpext <16 x half> %val to <16 x float>
-  store <16 x float> %cvt, <16 x float> addrspace(1)* %out
+  store <16 x float> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_f16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_extload_f16_to_f64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1011,13 +1011,13 @@ define amdgpu_kernel void @global_extload_f16_to_f64(double addrspace(1)* %out,
 ; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
 ; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GCN-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %in
+  %val = load half, ptr addrspace(1) %in
   %cvt = fpext half %val to double
-  store double %cvt, double addrspace(1)* %out
+  store double %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v2f16_to_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v2f16_to_v2f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1052,13 +1052,13 @@ define amdgpu_kernel void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(
 ; VI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <2 x half>, <2 x half> addrspace(1)* %in
+  %val = load <2 x half>, ptr addrspace(1) %in
   %cvt = fpext <2 x half> %val to <2 x double>
-  store <2 x double> %cvt, <2 x double> addrspace(1)* %out
+  store <2 x double> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v3f16_to_v3f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v3f16_to_v3f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1107,13 +1107,13 @@ define amdgpu_kernel void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(
 ; VI-NEXT:    flat_store_dwordx2 v[8:9], v[6:7]
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <3 x half>, <3 x half> addrspace(1)* %in
+  %val = load <3 x half>, ptr addrspace(1) %in
   %cvt = fpext <3 x half> %val to <3 x double>
-  store <3 x double> %cvt, <3 x double> addrspace(1)* %out
+  store <3 x double> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v4f16_to_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v4f16_to_v4f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1167,13 +1167,13 @@ define amdgpu_kernel void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(
 ; VI-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; VI-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <4 x half>, <4 x half> addrspace(1)* %in
+  %val = load <4 x half>, ptr addrspace(1) %in
   %cvt = fpext <4 x half> %val to <4 x double>
-  store <4 x double> %cvt, <4 x double> addrspace(1)* %out
+  store <4 x double> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v8f16_to_v8f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v8f16_to_v8f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1265,13 +1265,13 @@ define amdgpu_kernel void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(
 ; VI-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
 ; VI-NEXT:    flat_store_dwordx4 v[12:13], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <8 x half>, <8 x half> addrspace(1)* %in
+  %val = load <8 x half>, ptr addrspace(1) %in
   %cvt = fpext <8 x half> %val to <8 x double>
-  store <8 x double> %cvt, <8 x double> addrspace(1)* %out
+  store <8 x double> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_extload_v16f16_to_v16f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1452,13 +1452,13 @@ define amdgpu_kernel void @global_extload_v16f16_to_v16f64(<16 x double> addrspa
 ; VI-NEXT:    flat_store_dwordx4 v[19:20], v[0:3]
 ; VI-NEXT:    flat_store_dwordx4 v[13:14], v[5:8]
 ; VI-NEXT:    s_endpgm
-  %val = load <16 x half>, <16 x half> addrspace(1)* %in
+  %val = load <16 x half>, ptr addrspace(1) %in
   %cvt = fpext <16 x half> %val to <16 x double>
-  store <16 x double> %cvt, <16 x double> addrspace(1)* %out
+  store <16 x double> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_f32_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: global_truncstore_f32_to_f16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1472,13 +1472,13 @@ define amdgpu_kernel void @global_truncstore_f32_to_f16(half addrspace(1)* %out,
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    flat_store_short v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load float, float addrspace(1)* %in
+  %val = load float, ptr addrspace(1) %in
   %cvt = fptrunc float %val to half
-  store half %cvt, half addrspace(1)* %out
+  store half %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_truncstore_v2f32_to_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1511,13 +1511,13 @@ define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace
 ; VI-NEXT:    v_or_b32_e32 v2, v3, v2
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-  %val = load <2 x float>, <2 x float> addrspace(1)* %in
+  %val = load <2 x float>, ptr addrspace(1) %in
   %cvt = fptrunc <2 x float> %val to <2 x half>
-  store <2 x half> %cvt, <2 x half> addrspace(1)* %out
+  store <2 x half> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_truncstore_v3f32_to_v3f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1562,13 +1562,13 @@ define amdgpu_kernel void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v3
 ; VI-NEXT:    s_endpgm
-  %val = load <3 x float>, <3 x float> addrspace(1)* %in
+  %val = load <3 x float>, ptr addrspace(1) %in
   %cvt = fptrunc <3 x float> %val to <3 x half>
-  store <3 x half> %cvt, <3 x half> addrspace(1)* %out
+  store <3 x half> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_truncstore_v4f32_to_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1608,13 +1608,13 @@ define amdgpu_kernel void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace
 ; VI-NEXT:    v_or_b32_e32 v2, v5, v4
 ; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <4 x float>, <4 x float> addrspace(1)* %in
+  %val = load <4 x float>, ptr addrspace(1) %in
   %cvt = fptrunc <4 x float> %val to <4 x half>
-  store <4 x half> %cvt, <4 x half> addrspace(1)* %out
+  store <4 x half> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_truncstore_v8f32_to_v8f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1680,13 +1680,13 @@ define amdgpu_kernel void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace
 ; VI-NEXT:    v_or_b32_e32 v2, v4, v5
 ; VI-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <8 x float>, <8 x float> addrspace(1)* %in
+  %val = load <8 x float>, ptr addrspace(1) %in
   %cvt = fptrunc <8 x float> %val to <8 x half>
-  store <8 x half> %cvt, <8 x half> addrspace(1)* %out
+  store <8 x half> %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: global_truncstore_v16f32_to_v16f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1814,14 +1814,14 @@ define amdgpu_kernel void @global_truncstore_v16f32_to_v16f16(<16 x half> addrsp
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-  %val = load <16 x float>, <16 x float> addrspace(1)* %in
+  %val = load <16 x float>, ptr addrspace(1) %in
   %cvt = fptrunc <16 x float> %val to <16 x half>
-  store <16 x half> %cvt, <16 x half> addrspace(1)* %out
+  store <16 x half> %cvt, ptr addrspace(1) %out
   ret void
 }
 
 ; FIXME: Unsafe math should fold conversions away
-define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
+define amdgpu_kernel void @fadd_f16(ptr addrspace(1) %out, half %a, half %b) #0 {
 ; CI-LABEL: fadd_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x2
@@ -1851,11 +1851,11 @@ define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
    %add = fadd half %a, %b
-   store half %add, half addrspace(1)* %out, align 4
+   store half %add, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
+define amdgpu_kernel void @fadd_v2f16(ptr addrspace(1) %out, <2 x half> %a, <2 x half> %b) #0 {
 ; CI-LABEL: fadd_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1894,11 +1894,11 @@ define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %add = fadd <2 x half> %a, %b
-  store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
+  store <2 x half> %add, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
+define amdgpu_kernel void @fadd_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; CI-LABEL: fadd_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1954,15 +1954,15 @@ define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; VI-NEXT:    v_or_b32_e32 v0, v0, v3
 ; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
-  %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
-  %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
-  %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
+  %b_ptr = getelementptr <4 x half>, ptr addrspace(1) %in, i32 1
+  %a = load <4 x half>, ptr addrspace(1) %in, align 16
+  %b = load <4 x half>, ptr addrspace(1) %b_ptr, align 16
   %result = fadd <4 x half> %a, %b
-  store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
+  store <4 x half> %result, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
+define amdgpu_kernel void @fadd_v8f16(ptr addrspace(1) %out, <8 x half> %a, <8 x half> %b) #0 {
 ; CI-LABEL: fadd_v8f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x4
@@ -2063,11 +2063,11 @@ define amdgpu_kernel void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half>
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %add = fadd <8 x half> %a, %b
-  store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
+  store <8 x half> %add, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_kernel void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: test_bitcast_from_half:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2080,13 +2080,13 @@ define amdgpu_kernel void @test_bitcast_from_half(half addrspace(1)* %in, i16 ad
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_short v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load half, half addrspace(1)* %in
+  %val = load half, ptr addrspace(1) %in
   %val_int = bitcast half %val to i16
-  store i16 %val_int, i16 addrspace(1)* %out
+  store i16 %val_int, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GCN-LABEL: test_bitcast_to_half:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2099,9 +2099,9 @@ define amdgpu_kernel void @test_bitcast_to_half(half addrspace(1)* %out, i16 add
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    flat_store_short v[0:1], v2
 ; GCN-NEXT:    s_endpgm
-  %val = load i16, i16 addrspace(1)* %in
+  %val = load i16, ptr addrspace(1) %in
   %val_fp = bitcast i16 %val to half
-  store half %val_fp, half addrspace(1)* %out
+  store half %val_fp, ptr addrspace(1) %out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index f46aa7736108c..02791d73dda3f 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -521,7 +521,7 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi
   %src2.ext = fpext half %src2 to float
   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
   %cvt.result = fptrunc float %result to half
-  store volatile half %cvt.result, half addrspace(1)* undef
+  store volatile half %cvt.result, ptr addrspace(1) undef
   %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0)
   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
   %vec.result = insertelement <2 x half> undef, half %clamp, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 61881d48f19fb..50a3bb187c4ac 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -22,18 +22,18 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
+define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -58,27 +58,27 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
 ; GFX10PLUS-FMA-DAG: v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VC]], 0x41200000
 ; GFX940-FMA-DAG:v_fmac_f32_e32 [[VK]], [[VA]], [[VC]]
 ; GCN:          s_endpgm
-define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @madak_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
+  %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
+  %in.gep.2 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 2
 
-  %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+  %out.gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %in.gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %in.gep.1, align 4
-  %c = load volatile float, float addrspace(1)* %in.gep.2, align 4
+  %a = load volatile float, ptr addrspace(1) %in.gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %in.gep.1, align 4
+  %c = load volatile float, ptr addrspace(1) %in.gep.2, align 4
 
   %mul0 = fmul float %a, %b
   %mul1 = fmul float %a, %c
   %madak0 = fadd float %mul0, 10.0
   %madak1 = fadd float %mul1, 10.0
 
-  store volatile float %madak0, float addrspace(1)* %out.gep.0, align 4
-  store volatile float %madak1, float addrspace(1)* %out.gep.1, align 4
+  store volatile float %madak0, ptr addrspace(1) %out.gep.0, align 4
+  store volatile float %madak1, ptr addrspace(1) %out.gep.1, align 4
   ret void
 }
 
@@ -87,16 +87,16 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) #0 {
+define amdgpu_kernel void @madak_m_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
 
   %mul = fmul float 4.0, %a
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -115,18 +115,18 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
+define amdgpu_kernel void @madak_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 4.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -142,16 +142,16 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out
 ; GFX940-FMA:   v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
 ; GFX11-MAD:    v_mul_f32_e32 [[VMUL:v[0-9]+]], [[SB]], [[VA]]
 ; GFX11-MAD:    v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
-define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
+define amdgpu_kernel void @s_v_madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -166,16 +166,16 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float
 ; GFX940-FMA:    v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
 ; GFX11-MAD:     v_mul_f32_e32 [[VMUL:v[0-9]+]], [[SB]], [[VA]]
 ; GFX11-MAD:     v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
-define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
+define amdgpu_kernel void @v_s_madak_f32(ptr addrspace(1) noalias %out, float %a, ptr addrspace(1) noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -187,10 +187,10 @@ define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float
 ; GFX940-FMA: v_fmac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
 ; GFX11-MAD: v_dual_mov_b32 {{v[0-9]+}}, 0 :: v_dual_add_f32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
-define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+define amdgpu_kernel void @s_s_madak_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out, align 4
+  store float %madak, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -206,20 +206,20 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
 ; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], |{{v[0-9]+}}|, {{v[0-9]+}}
 ; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
+define amdgpu_kernel void @no_madak_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
   %mul = fmul float %a.fabs, %b
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -235,20 +235,20 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
 ; GFX11-MAD: v_mul_f32_e64 [[VMUL:v[0-9]+]], {{v[0-9]+}}, |{{v[0-9]+}}|
 ; GFX11-MAD: v_add_f32_e32 {{v[0-9]+}}, 0x41200000, [[VMUL]]
 ; GCN:       s_endpgm
-define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
+define amdgpu_kernel void @no_madak_src1_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
-  %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %in.a.gep = getelementptr float, ptr addrspace(1) %in.a, i32 %tid
+  %in.b.gep = getelementptr float, ptr addrspace(1) %in.b, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %in.a.gep, align 4
-  %b = load float, float addrspace(1)* %in.b.gep, align 4
+  %a = load float, ptr addrspace(1) %in.a.gep, align 4
+  %b = load float, ptr addrspace(1) %in.b.gep, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
   %mul = fmul float %a, %b.fabs
   %madak = fadd float %mul, 10.0
-  store float %madak, float addrspace(1)* %out.gep, align 4
+  store float %madak, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -276,15 +276,15 @@ bb:
   br i1 %tmp, label %bb3, label %bb4
 
 bb3:
-  store volatile float 0.0, float addrspace(1)* undef
+  store volatile float 0.0, ptr addrspace(1) undef
   br label %bb4
 
 bb4:
-  %vgpr = load volatile float, float addrspace(1)* undef
+  %vgpr = load volatile float, ptr addrspace(1) undef
   %tmp0 = fmul float %sgpr0, 0.5
   %tmp1 = fadd float %tmp0, 42.0
   %tmp2 = fmul float %tmp1, %vgpr
-  store volatile float %tmp2, float addrspace(1)* undef, align 4
+  store volatile float %tmp2, ptr addrspace(1) undef, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index 28c7831dd341b..51a0a50fbbff5 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -12,18 +12,18 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
-define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -34,27 +34,27 @@ define amdgpu_kernel void @madmk_f32(float addrspace(1)* noalias %out, float add
 ; GCN-DAG: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
 ; GCN-DAG: v_mac_f32_e32 [[VC]], 0x41200000, [[VA]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @madmk_2_use_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-  %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
+  %in.gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %in.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
+  %in.gep.2 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 2
 
-  %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+  %out.gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %out.gep.1 = getelementptr float, ptr addrspace(1) %in.gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %in.gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %in.gep.1, align 4
-  %c = load volatile float, float addrspace(1)* %in.gep.2, align 4
+  %a = load volatile float, ptr addrspace(1) %in.gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %in.gep.1, align 4
+  %c = load volatile float, ptr addrspace(1) %in.gep.2, align 4
 
   %mul0 = fmul float %a, 10.0
   %mul1 = fmul float %a, 10.0
   %madmk0 = fadd float %mul0, %b
   %madmk1 = fadd float %mul1, %c
 
-  store float %madmk0, float addrspace(1)* %out.gep.0, align 4
-  store float %madmk1, float addrspace(1)* %out.gep.1, align 4
+  store float %madmk0, ptr addrspace(1) %out.gep.0, align 4
+  store float %madmk1, ptr addrspace(1) %out.gep.1, align 4
   ret void
 }
 
@@ -63,18 +63,18 @@ define amdgpu_kernel void @madmk_2_use_f32(float addrspace(1)* noalias %out, flo
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
-define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @madmk_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %mul = fmul float %a, 4.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -82,13 +82,13 @@ define amdgpu_kernel void @madmk_inline_imm_f32(float addrspace(1)* noalias %out
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
+define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32], float %a, [8 x i32], float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -98,15 +98,15 @@ define amdgpu_kernel void @s_s_madmk_f32(float addrspace(1)* noalias %out, [8 x
 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
 ; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
 ; GCN: s_endpgm
-define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) #0 {
+define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float, float addrspace(1)* %gep.0, align 4
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load float, ptr addrspace(1) %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -114,15 +114,15 @@ define amdgpu_kernel void @v_s_madmk_f32(float addrspace(1)* noalias %out, float
 ; GCN-NOT: v_madmk_f32
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
-define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) #0 {
+define amdgpu_kernel void @scalar_vector_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %a) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %b = load float, float addrspace(1)* %gep.0, align 4
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %b = load float, ptr addrspace(1) %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -131,20 +131,20 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias %
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN-DAG: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[SK]], [[VB]]
-define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @no_madmk_src0_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
   %mul = fmul float %a.fabs, 10.0
   %madmk = fadd float %mul, %b
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -152,20 +152,20 @@ define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalia
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{[sv][0-9]+}}, |{{v[0-9]+}}|
-define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b.fabs
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 
@@ -173,16 +173,16 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalia
 ; GCN: buffer_load_dword [[A:v[0-9]+]]
 ; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
-define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
+define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
 
-  %a = load float, float addrspace(1)* %gep.0, align 4
+  %a = load float, ptr addrspace(1) %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, 2.0
-  store float %madmk, float addrspace(1)* %out.gep, align 4
+  store float %madmk, ptr addrspace(1) %out.gep, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
index b162eff966a1b..6b8a3ef2f06c8 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -9,9 +9,9 @@
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %rcp = fdiv float 1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -21,9 +21,9 @@ define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_ulp25_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %rcp = fdiv float 1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -33,9 +33,9 @@ define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %sr
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %rcp = fdiv fast float 1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -45,9 +45,9 @@ define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, floa
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %rcp = fdiv arcp float 1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -57,9 +57,9 @@ define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, floa
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
+define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(ptr addrspace(1) %out, float %src) #2 {
   %rcp = fdiv float 1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -69,10 +69,10 @@ define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %ou
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %src.fabs = call float @llvm.fabs.f32(float %src)
   %rcp = fdiv float 1.0, %src.fabs, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -82,9 +82,9 @@ define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src
 ; GCN: buffer_store_dword [[RCP]]
 
 ; EG: RECIP_IEEE
-define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @neg_rcp_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %rcp = fdiv float -1.0, %src, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -92,11 +92,11 @@ define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src)
 ; GCN: s_load_dword [[SRC:s[0-9]+]]
 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
 ; GCN: buffer_store_dword [[RCP]]
-define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(ptr addrspace(1) %out, float %src) #0 {
   %src.fabs = call float @llvm.fabs.f32(float %src)
   %src.fabs.fneg = fneg float %src.fabs
   %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -106,24 +106,24 @@ define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]|
 ; GCN: buffer_store_dword [[RCP]]
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
+define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(ptr addrspace(1) %out, float %src) #0 {
   %src.fabs = call float @llvm.fabs.f32(float %src)
   %src.fabs.fneg = fneg float %src.fabs
   %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0
-  store volatile float %rcp, float addrspace(1)* %out, align 4
+  store volatile float %rcp, ptr addrspace(1) %out, align 4
 
   %other = fmul float %src, %src.fabs.fneg
-  store volatile float %other, float addrspace(1)* %out, align 4
+  store volatile float %other, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:
 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], s{{[0-9]+}}, 0.5
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
-  %x = load float, float addrspace(1)* undef
+define amdgpu_kernel void @div_arcp_2_x_pat_f32(ptr addrspace(1) %out) #0 {
+  %x = load float, ptr addrspace(1) undef
   %rcp = fdiv arcp float %x, 2.0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -131,10 +131,10 @@ define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x3dcccccd
 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], s{{[0-9]+}}, [[V]]
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
-  %x = load float, float addrspace(1)* undef
+define amdgpu_kernel void @div_arcp_k_x_pat_f32(ptr addrspace(1) %out) #0 {
+  %x = load float, ptr addrspace(1) undef
   %rcp = fdiv arcp float %x, 10.0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -142,10 +142,10 @@ define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0xbdcccccd
 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], s{{[0-9]+}}, [[V]]
 ; GCN: buffer_store_dword [[MUL]]
-define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
-  %x = load float, float addrspace(1)* undef
+define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(ptr addrspace(1) %out) #0 {
+  %x = load float, ptr addrspace(1) undef
   %rcp = fdiv arcp float %x, -10.0
-  store float %rcp, float addrspace(1)* %out, align 4
+  store float %rcp, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
index b95efaf323eda..bde97c9b3cc30 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
@@ -2,41 +2,41 @@
 
 ; GCN-LABEL: {{^}}rcp_uint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_uint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
-  %load = load i32, i32 addrspace(1)* %in, align 4
+define amdgpu_kernel void @rcp_uint(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+  %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = uitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}rcp_sint:
 ; GCN: v_rcp_iflag_f32_e32
-define amdgpu_kernel void @rcp_sint(i32 addrspace(1)* %in, float addrspace(1)* %out) #0 {
-  %load = load i32, i32 addrspace(1)* %in, align 4
+define amdgpu_kernel void @rcp_sint(ptr addrspace(1) %in, ptr addrspace(1) %out) #0 {
+  %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = sitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}rcp_uint_denorm:
 ; GCN-NOT: v_rcp_iflag_f32
-define amdgpu_kernel void @rcp_uint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
-  %load = load i32, i32 addrspace(1)* %in, align 4
+define amdgpu_kernel void @rcp_uint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) #1 {
+  %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = uitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}rcp_sint_denorm:
 ; GCN-NOT: v_rcp_iflag_f32
-define amdgpu_kernel void @rcp_sint_denorm(i32 addrspace(1)* %in, float addrspace(1)* %out) #1 {
-  %load = load i32, i32 addrspace(1)* %in, align 4
+define amdgpu_kernel void @rcp_sint_denorm(ptr addrspace(1) %in, ptr addrspace(1) %out) #1 {
+  %load = load i32, ptr addrspace(1) %in, align 4
   %cvt = sitofp i32 %load to float
   %div = fdiv float 1.000000e+00, %cvt
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/rsq.ll b/llvm/test/CodeGen/AMDGPU/rsq.ll
index a6325388c3fe4..5f4a888e1a0ee 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.ll
@@ -8,11 +8,11 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
 ; SI-LABEL: {{^}}rsq_f32:
 ; SI: v_rsq_f32_e32
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
-  %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load float, ptr addrspace(1) %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -20,21 +20,21 @@ define amdgpu_kernel void @rsq_f32(float addrspace(1)* noalias %out, float addrs
 ; SI: v_sqrt_f64
 ; SI: v_rcp_f64
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
-  %val = load double, double addrspace(1)* %in, align 4
+define amdgpu_kernel void @rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load double, ptr addrspace(1) %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
   %div = fdiv double 1.0, %sqrt
-  store double %div, double addrspace(1)* %out, align 4
+  store double %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; SI-LABEL: {{^}}rsq_f32_sgpr:
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 ; SI: s_endpgm
-define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) #0 {
+define amdgpu_kernel void @rsq_f32_sgpr(ptr addrspace(1) noalias %out, float %val) #0 {
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -57,21 +57,21 @@ define amdgpu_kernel void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float
 ; SI-SAFE-NOT: v_rsq_f32
 
 ; SI: s_endpgm
-define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @rsqrt_fmul(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-  %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
+  %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
 
-  %a = load volatile float, float addrspace(1)* %gep.0
-  %b = load volatile float, float addrspace(1)* %gep.1
-  %c = load volatile float, float addrspace(1)* %gep.2
+  %a = load volatile float, ptr addrspace(1) %gep.0
+  %b = load volatile float, ptr addrspace(1) %gep.1
+  %c = load volatile float, ptr addrspace(1) %gep.2
 
   %x = call float @llvm.sqrt.f32(float %a)
   %y = fmul float %x, %b
   %z = fdiv float %c, %y
-  store float %z, float addrspace(1)* %out.gep
+  store float %z, ptr addrspace(1) %out.gep
   ret void
 }
 
@@ -83,11 +83,11 @@ define amdgpu_kernel void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(
 ; SI-UNSAFE: v_sqrt_f32_e32 [[SQRT:v[0-9]+]], v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
-  %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @neg_rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load float, ptr addrspace(1) %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val)
   %div = fdiv float -1.0, %sqrt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -104,11 +104,11 @@ define amdgpu_kernel void @neg_rsq_f32(float addrspace(1)* noalias %out, float a
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
-define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
-  %val = load double, double addrspace(1)* %in, align 4
+define amdgpu_kernel void @neg_rsq_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load double, ptr addrspace(1) %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val)
   %div = fdiv double -1.0, %sqrt
-  store double %div, double addrspace(1)* %out, align 4
+  store double %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -120,12 +120,12 @@ define amdgpu_kernel void @neg_rsq_f64(double addrspace(1)* noalias %out, double
 ; SI-UNSAFE: v_sqrt_f32_e64 [[SQRT:v[0-9]+]], -v{{[0-9]+}}
 ; SI-UNSAFE: v_rcp_f32_e64 [[RSQ:v[0-9]+]], -[[SQRT]]
 ; SI-UNSAFE: buffer_store_dword [[RSQ]]
-define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 {
-  %val = load float, float addrspace(1)* %in, align 4
+define amdgpu_kernel void @neg_rsq_neg_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load float, ptr addrspace(1) %in, align 4
   %val.fneg = fneg float %val
   %sqrt = call float @llvm.sqrt.f32(float %val.fneg)
   %div = fdiv float -1.0, %sqrt, !fpmath !0
-  store float %div, float addrspace(1)* %out, align 4
+  store float %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -142,12 +142,12 @@ define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, flo
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
 ; SI-UNSAFE: v_fma_f64
-define amdgpu_kernel void @neg_rsq_neg_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #0 {
-  %val = load double, double addrspace(1)* %in, align 4
+define amdgpu_kernel void @neg_rsq_neg_f64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
+  %val = load double, ptr addrspace(1) %in, align 4
   %val.fneg = fsub double -0.0, %val
   %sqrt = call double @llvm.sqrt.f64(double %val.fneg)
   %div = fdiv double -1.0, %sqrt
-  store double %div, double addrspace(1)* %out, align 4
+  store double %div, ptr addrspace(1) %out, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 825950b8380dd..879e902594bc8 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -8,75 +8,75 @@
 ; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
 ; GCN: v_mac_f32_e32 [[C]], [[A]], [[B]]
 ; GCN: buffer_store_dword [[C]]
-define amdgpu_kernel void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @mac_vvv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load volatile float, float addrspace(1)* %in
-  %b = load volatile float, float addrspace(1)* %b_ptr
-  %c = load volatile float, float addrspace(1)* %c_ptr
+  %a = load volatile float, ptr addrspace(1) %in
+  %b = load volatile float, ptr addrspace(1) %b_ptr
+  %c = load volatile float, ptr addrspace(1) %c_ptr
 
   %tmp0 = fmul float %a, %b
   %tmp1 = fadd float %tmp0, %c
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mad_inline_sgpr_inline:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]}}, s{{[0-9]+}}, 0.5, 0.5
-define amdgpu_kernel void @mad_inline_sgpr_inline(float addrspace(1)* %out, float %in) #0 {
+define amdgpu_kernel void @mad_inline_sgpr_inline(ptr addrspace(1) %out, float %in) #0 {
 entry:
   %tmp0 = fmul float 0.5, %in
   %tmp1 = fadd float %tmp0, 0.5
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mad_vvs:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @mad_vvs(float addrspace(1)* %out, float addrspace(1)* %in, float %c) #0 {
+define amdgpu_kernel void @mad_vvs(ptr addrspace(1) %out, ptr addrspace(1) %in, float %c) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
 
   %tmp0 = fmul float %a, %b
   %tmp1 = fadd float %tmp0, %c
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mac_ssv:
 ; GCN: v_mac_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-define amdgpu_kernel void @mac_ssv(float addrspace(1)* %out, float addrspace(1)* %in, float %a) #0 {
+define amdgpu_kernel void @mac_ssv(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) #0 {
 entry:
-  %c = load float, float addrspace(1)* %in
+  %c = load float, ptr addrspace(1) %in
 
   %tmp0 = fmul float %a, %a
   %tmp1 = fadd float %tmp0, %c
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mac_mad_same_add:
 ; GCN: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
 ; GCN: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @mac_mad_same_add(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @mac_mad_same_add(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
-  %d_ptr = getelementptr float, float addrspace(1)* %in, i32 3
-  %e_ptr = getelementptr float, float addrspace(1)* %in, i32 4
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
+  %d_ptr = getelementptr float, ptr addrspace(1) %in, i32 3
+  %e_ptr = getelementptr float, ptr addrspace(1) %in, i32 4
 
-  %a = load volatile float, float addrspace(1)* %in
-  %b = load volatile float, float addrspace(1)* %b_ptr
-  %c = load volatile float, float addrspace(1)* %c_ptr
-  %d = load volatile float, float addrspace(1)* %d_ptr
-  %e = load volatile float, float addrspace(1)* %e_ptr
+  %a = load volatile float, ptr addrspace(1) %in
+  %b = load volatile float, ptr addrspace(1) %b_ptr
+  %c = load volatile float, ptr addrspace(1) %c_ptr
+  %d = load volatile float, ptr addrspace(1) %d_ptr
+  %e = load volatile float, ptr addrspace(1) %e_ptr
 
   %tmp0 = fmul float %a, %b
   %tmp1 = fadd float %tmp0, %c
@@ -84,9 +84,9 @@ entry:
   %tmp2 = fmul float %d, %e
   %tmp3 = fadd float %tmp2, %c
 
-  %out1 = getelementptr float, float addrspace(1)* %out, i32 1
-  store float %tmp1, float addrspace(1)* %out
-  store float %tmp3, float addrspace(1)* %out1
+  %out1 = getelementptr float, ptr addrspace(1) %out, i32 1
+  store float %tmp1, ptr addrspace(1) %out
+  store float %tmp3, ptr addrspace(1) %out1
   ret void
 }
 
@@ -96,120 +96,120 @@ entry:
 ; GCN-LABEL: {{^}}mad_neg_src0:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @mad_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_a = fneg float %a
   %tmp0 = fmul float %neg_a, %b
   %tmp1 = fadd float %tmp0, %c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}nsz_mad_sub0_src0:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_a = fsub float 0.0, %a
   %tmp0 = fmul float %neg_a, %b
   %tmp1 = fadd float %tmp0, %c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}safe_mad_sub0_src0:
 ; GCN: v_sub_f32_e32 [[SUB0:v[0-9]+]], 0,
 ; GCN: v_ma{{[cd]}}_f32{{[_e32]*}} v{{[0-9]+}}, [[SUB0]], v{{[0-9]+}}
-define amdgpu_kernel void @safe_mad_sub0_src0(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @safe_mad_sub0_src0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_a = fsub float 0.0, %a
   %tmp0 = fmul float %neg_a, %b
   %tmp1 = fadd float %tmp0, %c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mad_neg_src1:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src1(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @mad_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_b = fneg float %b
   %tmp0 = fmul float %a, %neg_b
   %tmp1 = fadd float %tmp0, %c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}nsz_mad_sub0_src1:
 ; GCN-NOT: v_mac_f32
 ; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
-define amdgpu_kernel void @nsz_mad_sub0_src1(float addrspace(1)* %out, float addrspace(1)* %in) #1 {
+define amdgpu_kernel void @nsz_mad_sub0_src1(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_b = fsub float 0.0, %b
   %tmp0 = fmul float %a, %neg_b
   %tmp1 = fadd float %tmp0, %c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
 ; GCN-LABEL: {{^}}mad_neg_src2:
 ; GCN-NOT: v_mac
 ; GCN: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
-define amdgpu_kernel void @mad_neg_src2(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+define amdgpu_kernel void @mad_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 entry:
-  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %c_ptr = getelementptr float, float addrspace(1)* %in, i32 2
+  %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
+  %c_ptr = getelementptr float, ptr addrspace(1) %in, i32 2
 
-  %a = load float, float addrspace(1)* %in
-  %b = load float, float addrspace(1)* %b_ptr
-  %c = load float, float addrspace(1)* %c_ptr
+  %a = load float, ptr addrspace(1) %in
+  %b = load float, ptr addrspace(1) %b_ptr
+  %c = load float, ptr addrspace(1) %c_ptr
 
   %neg_c = fneg float %c
   %tmp0 = fmul float %a, %b
   %tmp1 = fadd float %tmp0, %neg_c
 
-  store float %tmp1, float addrspace(1)* %out
+  store float %tmp1, ptr addrspace(1) %out
   ret void
 }
 
@@ -222,15 +222,15 @@ entry:
 
 ; GCN: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; GCN: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) #3 {
+define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #3 {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext
-  %gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
-  %tmp = load volatile float, float addrspace(1)* %gep.a
-  %tmp1 = load volatile float, float addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds float, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds float, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.out = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
+  %tmp = load volatile float, ptr addrspace(1) %gep.a
+  %tmp1 = load volatile float, ptr addrspace(1) %gep.b
   %tmp2 = fadd float %tmp, %tmp
   %tmp3 = fmul float %tmp2, 4.0
   %tmp4 = fsub float 1.0, %tmp3
@@ -240,7 +240,7 @@ bb:
   %tmp8 = fsub float 1.0, %tmp7
   %tmp9 = fmul float %tmp8, 8.0
   %tmp10 = fadd float %tmp5, %tmp9
-  store float %tmp10, float addrspace(1)* %gep.out
+  store float %tmp10, ptr addrspace(1) %gep.out
   ret void
 }
 
@@ -257,15 +257,15 @@ bb:
 
 ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f16(half addrspace(1)* %out, half addrspace(1)* %a, half addrspace(1)* %b) #3 {
+define amdgpu_kernel void @fold_inline_imm_into_mac_src2_f16(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #3 {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
-  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
-  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
-  %gep.out = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
-  %tmp = load volatile half, half addrspace(1)* %gep.a
-  %tmp1 = load volatile half, half addrspace(1)* %gep.b
+  %gep.a = getelementptr inbounds half, ptr addrspace(1) %a, i64 %tid.ext
+  %gep.b = getelementptr inbounds half, ptr addrspace(1) %b, i64 %tid.ext
+  %gep.out = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
+  %tmp = load volatile half, ptr addrspace(1) %gep.a
+  %tmp1 = load volatile half, ptr addrspace(1) %gep.b
   %tmp2 = fadd half %tmp, %tmp
   %tmp3 = fmul half %tmp2, 4.0
   %tmp4 = fsub half 1.0, %tmp3
@@ -275,7 +275,7 @@ bb:
   %tmp8 = fsub half 1.0, %tmp7
   %tmp9 = fmul half %tmp8, 8.0
   %tmp10 = fadd half %tmp5, %tmp9
-  store half %tmp10, half addrspace(1)* %gep.out
+  store half %tmp10, ptr addrspace(1) %gep.out
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
index 86f67e085cd1c..15184f1cda8c7 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -15,19 +15,19 @@
 ; VI:  buffer_store_short v[[C_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_f16(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %t.val = fmul half %a.val, %b.val
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -39,19 +39,19 @@ entry:
 ; VI:  v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_f16_same_add(
-    half addrspace(1)* %r0,
-    half addrspace(1)* %r1,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c,
-    half addrspace(1)* %d,
-    half addrspace(1)* %e) #0 {
+    ptr addrspace(1) %r0,
+    ptr addrspace(1) %r1,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c,
+    ptr addrspace(1) %d,
+    ptr addrspace(1) %e) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
-  %d.val = load half, half addrspace(1)* %d
-  %e.val = load half, half addrspace(1)* %e
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
+  %d.val = load half, ptr addrspace(1) %d
+  %e.val = load half, ptr addrspace(1) %e
 
   %t0.val = fmul half %a.val, %b.val
   %r0.val = fadd half %t0.val, %c.val
@@ -59,8 +59,8 @@ entry:
   %t1.val = fmul half %d.val, %e.val
   %r1.val = fadd half %t1.val, %c.val
 
-  store half %r0.val, half addrspace(1)* %r0
-  store half %r1.val, half addrspace(1)* %r1
+  store half %r0.val, ptr addrspace(1) %r0
+  store half %r1.val, ptr addrspace(1) %r1
   ret void
 }
 
@@ -74,20 +74,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_a(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %a.neg = fneg half %a.val
   %t.val = fmul half %a.neg, %b.val
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -101,20 +101,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_b(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %b.neg = fneg half %b.val
   %t.val = fmul half %a.val, %b.neg
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -128,20 +128,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_c(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %c.neg = fneg half %c.val
   %t.val = fmul half %a.val, %b.val
   %r.val = fadd half %t.val, %c.neg
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -152,20 +152,20 @@ entry:
 ; VI:  v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_f16_neg_a_safe_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %a.neg = fsub half 0.0, %a.val
   %t.val = fmul half %a.neg, %b.val
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -176,20 +176,20 @@ entry:
 ; VI:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_f16_neg_b_safe_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %b.neg = fsub half 0.0, %b.val
   %t.val = fmul half %a.val, %b.neg
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -200,20 +200,20 @@ entry:
 ; VI:  v_mac_f16_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_f16_neg_c_safe_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %c.neg = fsub half 0.0, %c.val
   %t.val = fmul half %a.val, %b.val
   %r.val = fadd half %t.val, %c.neg
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -227,20 +227,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_a_nsz_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %a.neg = fsub half 0.0, %a.val
   %t.val = fmul half %a.neg, %b.val
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -254,20 +254,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_b_nsz_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %b.neg = fsub half 0.0, %b.val
   %t.val = fmul half %a.val, %b.neg
   %r.val = fadd half %t.val, %c.val
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -281,20 +281,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_f16_neg_c_nsz_fp_math(
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
-  %c.val = load half, half addrspace(1)* %c
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
+  %c.val = load half, ptr addrspace(1) %c
 
   %c.neg = fsub half 0.0, %c.val
   %t.val = fmul half %a.val, %b.val
   %r.val = fadd half %t.val, %c.neg
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -333,21 +333,21 @@ entry:
 ; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_v2f16(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  %a.val = load <2 x half>, ptr addrspace(1) %a
   call void @llvm.amdgcn.s.barrier() #2
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  %b.val = load <2 x half>, ptr addrspace(1) %b
   call void @llvm.amdgcn.s.barrier() #2
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %t.val = fmul <2 x half> %a.val, %b.val
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -364,19 +364,19 @@ entry:
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_v2f16_same_add(
-    <2 x half> addrspace(1)* %r0,
-    <2 x half> addrspace(1)* %r1,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c,
-    <2 x half> addrspace(1)* %d,
-    <2 x half> addrspace(1)* %e) #0 {
+    ptr addrspace(1) %r0,
+    ptr addrspace(1) %r1,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c,
+    ptr addrspace(1) %d,
+    ptr addrspace(1) %e) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
-  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
-  %e.val = load <2 x half>, <2 x half> addrspace(1)* %e
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
+  %d.val = load <2 x half>, ptr addrspace(1) %d
+  %e.val = load <2 x half>, ptr addrspace(1) %e
 
   %t0.val = fmul <2 x half> %a.val, %b.val
   %r0.val = fadd <2 x half> %t0.val, %c.val
@@ -384,8 +384,8 @@ entry:
   %t1.val = fmul <2 x half> %d.val, %e.val
   %r1.val = fadd <2 x half> %t1.val, %c.val
 
-  store <2 x half> %r0.val, <2 x half> addrspace(1)* %r0
-  store <2 x half> %r1.val, <2 x half> addrspace(1)* %r1
+  store <2 x half> %r0.val, ptr addrspace(1) %r0
+  store <2 x half> %r1.val, ptr addrspace(1) %r1
   ret void
 }
 
@@ -401,20 +401,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_a(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %a.neg = fneg <2 x half> %a.val
   %t.val = fmul <2 x half> %a.neg, %b.val
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -430,20 +430,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_b(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %b.neg = fneg <2 x half> %b.val
   %t.val = fmul <2 x half> %a.val, %b.neg
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -463,20 +463,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_c(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %c.neg = fneg <2 x half> %c.val
   %t.val = fmul <2 x half> %a.val, %b.val
   %r.val = fadd <2 x half> %t.val, %c.neg
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -495,20 +495,20 @@ entry:
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_a_safe_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
   %t.val = fmul <2 x half> %a.neg, %b.val
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -527,20 +527,20 @@ entry:
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_b_safe_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
   %t.val = fmul <2 x half> %a.val, %b.neg
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -559,20 +559,20 @@ entry:
 
 ; GCN: s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_c_safe_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
   %t.val = fmul <2 x half> %a.val, %b.val
   %r.val = fadd <2 x half> %t.val, %c.neg
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -592,20 +592,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_a_nsz_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
   %t.val = fmul <2 x half> %a.neg, %b.val
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -625,20 +625,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_b_nsz_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
   %t.val = fmul <2 x half> %a.val, %b.neg
   %r.val = fadd <2 x half> %t.val, %c.val
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -658,20 +658,20 @@ entry:
 ; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
 ; GCN:    s_endpgm
 define amdgpu_kernel void @mac_v2f16_neg_c_nsz_fp_math(
-    <2 x half> addrspace(1)* %r,
-    <2 x half> addrspace(1)* %a,
-    <2 x half> addrspace(1)* %b,
-    <2 x half> addrspace(1)* %c) #1 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #1 {
 entry:
-  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
-  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
-  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
+  %a.val = load <2 x half>, ptr addrspace(1) %a
+  %b.val = load <2 x half>, ptr addrspace(1) %b
+  %c.val = load <2 x half>, ptr addrspace(1) %c
 
   %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
   %t.val = fmul <2 x half> %a.val, %b.val
   %r.val = fadd <2 x half> %t.val, %c.neg
 
-  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
+  store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index f89db580af945..433368bf616eb 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -50,17 +50,17 @@ define amdgpu_kernel void @madak_f16(
 ; VI-NEXT:    v_madak_f16 v0, v0, v1, 0x4900
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
-    half addrspace(1)* %r,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b) #0 {
+    ptr addrspace(1) %r,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b) #0 {
 entry:
-  %a.val = load half, half addrspace(1)* %a
-  %b.val = load half, half addrspace(1)* %b
+  %a.val = load half, ptr addrspace(1) %a
+  %b.val = load half, ptr addrspace(1) %b
 
   %t.val = fmul half %a.val, %b.val
   %r.val = fadd half %t.val, 10.0
 
-  store half %r.val, half addrspace(1)* %r
+  store half %r.val, ptr addrspace(1) %r
   ret void
 }
 
@@ -149,23 +149,23 @@ define amdgpu_kernel void @madak_f16_use_2(
 ; VI-NEXT:    buffer_store_short v1, off, s[0:3], 0
 ; VI-NEXT:    buffer_store_short v3, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
-    half addrspace(1)* %r0,
-    half addrspace(1)* %r1,
-    half addrspace(1)* %a,
-    half addrspace(1)* %b,
-    half addrspace(1)* %c) #0 {
+    ptr addrspace(1) %r0,
+    ptr addrspace(1) %r1,
+    ptr addrspace(1) %a,
+    ptr addrspace(1) %b,
+    ptr addrspace(1) %c) #0 {
 entry:
-  %a.val = load volatile half, half addrspace(1)* %a
-  %b.val = load volatile half, half addrspace(1)* %b
-  %c.val = load volatile half, half addrspace(1)* %c
+  %a.val = load volatile half, ptr addrspace(1) %a
+  %b.val = load volatile half, ptr addrspace(1) %b
+  %c.val = load volatile half, ptr addrspace(1) %c
 
   %t0.val = fmul half %a.val, %b.val
   %t1.val = fmul half %a.val, %c.val
   %r0.val = fadd half %t0.val, 10.0
   %r1.val = fadd half %t1.val, 10.0
 
-  store half %r0.val, half addrspace(1)* %r0
-  store half %r1.val, half addrspace(1)* %r1
+  store half %r0.val, ptr addrspace(1) %r0
+  store half %r1.val, ptr addrspace(1) %r1
   ret void
 }
 


        


More information about the llvm-commits mailing list