[llvm] 9b81548 - [NVPTX] Convert some tests to opaque pointers (NFC)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 19 03:58:09 PST 2022


Author: Nikita Popov
Date: 2022-12-19T12:57:23+01:00
New Revision: 9b81548a6847937f194bf62033f295b8385d9b42

URL: https://github.com/llvm/llvm-project/commit/9b81548a6847937f194bf62033f295b8385d9b42
DIFF: https://github.com/llvm/llvm-project/commit/9b81548a6847937f194bf62033f295b8385d9b42.diff

LOG: [NVPTX] Convert some tests to opaque pointers (NFC)

Added: 
    

Modified: 
    llvm/test/CodeGen/NVPTX/APIntLoadStore.ll
    llvm/test/CodeGen/NVPTX/APIntParam.ll
    llvm/test/CodeGen/NVPTX/APIntSextParam.ll
    llvm/test/CodeGen/NVPTX/APIntZextParam.ll
    llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
    llvm/test/CodeGen/NVPTX/MachineSink-call.ll
    llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
    llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
    llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll
    llvm/test/CodeGen/NVPTX/aggregate-return.ll
    llvm/test/CodeGen/NVPTX/alias.ll
    llvm/test/CodeGen/NVPTX/annotations.ll
    llvm/test/CodeGen/NVPTX/async-copy.ll
    llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
    llvm/test/CodeGen/NVPTX/atomics-sm60.ll
    llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
    llvm/test/CodeGen/NVPTX/atomics.ll
    llvm/test/CodeGen/NVPTX/b52037.ll
    llvm/test/CodeGen/NVPTX/bf16.ll
    llvm/test/CodeGen/NVPTX/branch-fold.ll
    llvm/test/CodeGen/NVPTX/bug17709.ll
    llvm/test/CodeGen/NVPTX/bug21465.ll
    llvm/test/CodeGen/NVPTX/bug22246.ll
    llvm/test/CodeGen/NVPTX/bug22322.ll
    llvm/test/CodeGen/NVPTX/bug26185-2.ll
    llvm/test/CodeGen/NVPTX/bug26185.ll
    llvm/test/CodeGen/NVPTX/bug41651.ll
    llvm/test/CodeGen/NVPTX/bug52623.ll
    llvm/test/CodeGen/NVPTX/bypass-div.ll
    llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
    llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
    llvm/test/CodeGen/NVPTX/callchain.ll
    llvm/test/CodeGen/NVPTX/calling-conv.ll
    llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
    llvm/test/CodeGen/NVPTX/ctlz.ll
    llvm/test/CodeGen/NVPTX/disable-opt.ll
    llvm/test/CodeGen/NVPTX/divrem-combine.ll
    llvm/test/CodeGen/NVPTX/extloadv.ll
    llvm/test/CodeGen/NVPTX/f16-instructions.ll
    llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
    llvm/test/CodeGen/NVPTX/fp16.ll
    llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
    llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
    llvm/test/CodeGen/NVPTX/global-ctor-empty.ll
    llvm/test/CodeGen/NVPTX/global-ctor.ll
    llvm/test/CodeGen/NVPTX/global-dtor.ll
    llvm/test/CodeGen/NVPTX/global-ordering.ll
    llvm/test/CodeGen/NVPTX/globals_lowering.ll
    llvm/test/CodeGen/NVPTX/half.ll
    llvm/test/CodeGen/NVPTX/i1-global.ll
    llvm/test/CodeGen/NVPTX/i1-param.ll
    llvm/test/CodeGen/NVPTX/i128-param.ll
    llvm/test/CodeGen/NVPTX/i128-retval.ll
    llvm/test/CodeGen/NVPTX/i8-param.ll
    llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll
    llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
    llvm/test/CodeGen/NVPTX/intrinsics.ll
    llvm/test/CodeGen/NVPTX/isspacep.ll
    llvm/test/CodeGen/NVPTX/ld-generic.ll
    llvm/test/CodeGen/NVPTX/ldg-invariant.ll
    llvm/test/CodeGen/NVPTX/ldparam-v4.ll
    llvm/test/CodeGen/NVPTX/ldu-i8.ll
    llvm/test/CodeGen/NVPTX/ldu-ldg.ll
    llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
    llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
    llvm/test/CodeGen/NVPTX/load-sext-i1.ll
    llvm/test/CodeGen/NVPTX/load-store.ll
    llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
    llvm/test/CodeGen/NVPTX/local-stack-frame.ll
    llvm/test/CodeGen/NVPTX/loop-vectorize.ll
    llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
    llvm/test/CodeGen/NVPTX/lower-alloca.ll
    llvm/test/CodeGen/NVPTX/lower-args.ll
    llvm/test/CodeGen/NVPTX/lower-byval-args.ll
    llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
    llvm/test/CodeGen/NVPTX/machine-sink.ll
    llvm/test/CodeGen/NVPTX/managed.ll
    llvm/test/CodeGen/NVPTX/mbarrier.ll
    llvm/test/CodeGen/NVPTX/minmax-negative.ll
    llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
    llvm/test/CodeGen/NVPTX/no-extra-parens.ll
    llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
    llvm/test/CodeGen/NVPTX/nofunc.ll
    llvm/test/CodeGen/NVPTX/nounroll.ll
    llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
    llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
    llvm/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll
    llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
    llvm/test/CodeGen/NVPTX/packed-aggr.ll
    llvm/test/CodeGen/NVPTX/param-align.ll
    llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
    llvm/test/CodeGen/NVPTX/pr16278.ll
    llvm/test/CodeGen/NVPTX/pr17529.ll
    llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll
    llvm/test/CodeGen/NVPTX/refl1.ll
    llvm/test/CodeGen/NVPTX/reg-copy.ll
    llvm/test/CodeGen/NVPTX/reg-types.ll
    llvm/test/CodeGen/NVPTX/sched1.ll
    llvm/test/CodeGen/NVPTX/sched2.ll
    llvm/test/CodeGen/NVPTX/sext-in-reg.ll
    llvm/test/CodeGen/NVPTX/shfl.ll
    llvm/test/CodeGen/NVPTX/shift-parts.ll
    llvm/test/CodeGen/NVPTX/simple-call.ll
    llvm/test/CodeGen/NVPTX/st-generic.ll
    llvm/test/CodeGen/NVPTX/store-retval.ll
    llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
    llvm/test/CodeGen/NVPTX/surf-read.ll
    llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
    llvm/test/CodeGen/NVPTX/surf-write.ll
    llvm/test/CodeGen/NVPTX/symbol-naming.ll
    llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
    llvm/test/CodeGen/NVPTX/tex-read.ll
    llvm/test/CodeGen/NVPTX/texsurf-queries.ll
    llvm/test/CodeGen/NVPTX/tuple-literal.ll
    llvm/test/CodeGen/NVPTX/vaargs.ll
    llvm/test/CodeGen/NVPTX/vec8.ll
    llvm/test/CodeGen/NVPTX/vector-compare.ll
    llvm/test/CodeGen/NVPTX/vector-loads.ll
    llvm/test/CodeGen/NVPTX/vector-select.ll
    llvm/test/CodeGen/NVPTX/vector-stores.ll
    llvm/test/CodeGen/NVPTX/vectorize-misaligned.ll
    llvm/test/CodeGen/NVPTX/weak-global.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/NVPTX/APIntLoadStore.ll b/llvm/test/CodeGen/NVPTX/APIntLoadStore.ll
index 66ed45579f014..aaa4109c21025 100644
--- a/llvm/test/CodeGen/NVPTX/APIntLoadStore.ll
+++ b/llvm/test/CodeGen/NVPTX/APIntLoadStore.ll
@@ -1,515 +1,515 @@
 ; RUN: llc -march=nvptx  < %s > %t
 ; RUN: llc -march=nvptx64  < %s > %t
 
- at i1_l = external global i1		; <i1*> [#uses=1]
- at i1_s = external global i1		; <i1*> [#uses=1]
- at i2_l = external global i2		; <i2*> [#uses=1]
- at i2_s = external global i2		; <i2*> [#uses=1]
- at i3_l = external global i3		; <i3*> [#uses=1]
- at i3_s = external global i3		; <i3*> [#uses=1]
- at i4_l = external global i4		; <i4*> [#uses=1]
- at i4_s = external global i4		; <i4*> [#uses=1]
- at i5_l = external global i5		; <i5*> [#uses=1]
- at i5_s = external global i5		; <i5*> [#uses=1]
- at i6_l = external global i6		; <i6*> [#uses=1]
- at i6_s = external global i6		; <i6*> [#uses=1]
- at i7_l = external global i7		; <i7*> [#uses=1]
- at i7_s = external global i7		; <i7*> [#uses=1]
- at i8_l = external global i8		; <i8*> [#uses=1]
- at i8_s = external global i8		; <i8*> [#uses=1]
- at i9_l = external global i9		; <i9*> [#uses=1]
- at i9_s = external global i9		; <i9*> [#uses=1]
- at i10_l = external global i10		; <i10*> [#uses=1]
- at i10_s = external global i10		; <i10*> [#uses=1]
- at i11_l = external global i11		; <i11*> [#uses=1]
- at i11_s = external global i11		; <i11*> [#uses=1]
- at i12_l = external global i12		; <i12*> [#uses=1]
- at i12_s = external global i12		; <i12*> [#uses=1]
- at i13_l = external global i13		; <i13*> [#uses=1]
- at i13_s = external global i13		; <i13*> [#uses=1]
- at i14_l = external global i14		; <i14*> [#uses=1]
- at i14_s = external global i14		; <i14*> [#uses=1]
- at i15_l = external global i15		; <i15*> [#uses=1]
- at i15_s = external global i15		; <i15*> [#uses=1]
- at i16_l = external global i16		; <i16*> [#uses=1]
- at i16_s = external global i16		; <i16*> [#uses=1]
- at i17_l = external global i17		; <i17*> [#uses=1]
- at i17_s = external global i17		; <i17*> [#uses=1]
- at i18_l = external global i18		; <i18*> [#uses=1]
- at i18_s = external global i18		; <i18*> [#uses=1]
- at i19_l = external global i19		; <i19*> [#uses=1]
- at i19_s = external global i19		; <i19*> [#uses=1]
- at i20_l = external global i20		; <i20*> [#uses=1]
- at i20_s = external global i20		; <i20*> [#uses=1]
- at i21_l = external global i21		; <i21*> [#uses=1]
- at i21_s = external global i21		; <i21*> [#uses=1]
- at i22_l = external global i22		; <i22*> [#uses=1]
- at i22_s = external global i22		; <i22*> [#uses=1]
- at i23_l = external global i23		; <i23*> [#uses=1]
- at i23_s = external global i23		; <i23*> [#uses=1]
- at i24_l = external global i24		; <i24*> [#uses=1]
- at i24_s = external global i24		; <i24*> [#uses=1]
- at i25_l = external global i25		; <i25*> [#uses=1]
- at i25_s = external global i25		; <i25*> [#uses=1]
- at i26_l = external global i26		; <i26*> [#uses=1]
- at i26_s = external global i26		; <i26*> [#uses=1]
- at i27_l = external global i27		; <i27*> [#uses=1]
- at i27_s = external global i27		; <i27*> [#uses=1]
- at i28_l = external global i28		; <i28*> [#uses=1]
- at i28_s = external global i28		; <i28*> [#uses=1]
- at i29_l = external global i29		; <i29*> [#uses=1]
- at i29_s = external global i29		; <i29*> [#uses=1]
- at i30_l = external global i30		; <i30*> [#uses=1]
- at i30_s = external global i30		; <i30*> [#uses=1]
- at i31_l = external global i31		; <i31*> [#uses=1]
- at i31_s = external global i31		; <i31*> [#uses=1]
- at i32_l = external global i32		; <i32*> [#uses=1]
- at i32_s = external global i32		; <i32*> [#uses=1]
- at i33_l = external global i33		; <i33*> [#uses=1]
- at i33_s = external global i33		; <i33*> [#uses=1]
- at i34_l = external global i34		; <i34*> [#uses=1]
- at i34_s = external global i34		; <i34*> [#uses=1]
- at i35_l = external global i35		; <i35*> [#uses=1]
- at i35_s = external global i35		; <i35*> [#uses=1]
- at i36_l = external global i36		; <i36*> [#uses=1]
- at i36_s = external global i36		; <i36*> [#uses=1]
- at i37_l = external global i37		; <i37*> [#uses=1]
- at i37_s = external global i37		; <i37*> [#uses=1]
- at i38_l = external global i38		; <i38*> [#uses=1]
- at i38_s = external global i38		; <i38*> [#uses=1]
- at i39_l = external global i39		; <i39*> [#uses=1]
- at i39_s = external global i39		; <i39*> [#uses=1]
- at i40_l = external global i40		; <i40*> [#uses=1]
- at i40_s = external global i40		; <i40*> [#uses=1]
- at i41_l = external global i41		; <i41*> [#uses=1]
- at i41_s = external global i41		; <i41*> [#uses=1]
- at i42_l = external global i42		; <i42*> [#uses=1]
- at i42_s = external global i42		; <i42*> [#uses=1]
- at i43_l = external global i43		; <i43*> [#uses=1]
- at i43_s = external global i43		; <i43*> [#uses=1]
- at i44_l = external global i44		; <i44*> [#uses=1]
- at i44_s = external global i44		; <i44*> [#uses=1]
- at i45_l = external global i45		; <i45*> [#uses=1]
- at i45_s = external global i45		; <i45*> [#uses=1]
- at i46_l = external global i46		; <i46*> [#uses=1]
- at i46_s = external global i46		; <i46*> [#uses=1]
- at i47_l = external global i47		; <i47*> [#uses=1]
- at i47_s = external global i47		; <i47*> [#uses=1]
- at i48_l = external global i48		; <i48*> [#uses=1]
- at i48_s = external global i48		; <i48*> [#uses=1]
- at i49_l = external global i49		; <i49*> [#uses=1]
- at i49_s = external global i49		; <i49*> [#uses=1]
- at i50_l = external global i50		; <i50*> [#uses=1]
- at i50_s = external global i50		; <i50*> [#uses=1]
- at i51_l = external global i51		; <i51*> [#uses=1]
- at i51_s = external global i51		; <i51*> [#uses=1]
- at i52_l = external global i52		; <i52*> [#uses=1]
- at i52_s = external global i52		; <i52*> [#uses=1]
- at i53_l = external global i53		; <i53*> [#uses=1]
- at i53_s = external global i53		; <i53*> [#uses=1]
- at i54_l = external global i54		; <i54*> [#uses=1]
- at i54_s = external global i54		; <i54*> [#uses=1]
- at i55_l = external global i55		; <i55*> [#uses=1]
- at i55_s = external global i55		; <i55*> [#uses=1]
- at i56_l = external global i56		; <i56*> [#uses=1]
- at i56_s = external global i56		; <i56*> [#uses=1]
- at i57_l = external global i57		; <i57*> [#uses=1]
- at i57_s = external global i57		; <i57*> [#uses=1]
- at i58_l = external global i58		; <i58*> [#uses=1]
- at i58_s = external global i58		; <i58*> [#uses=1]
- at i59_l = external global i59		; <i59*> [#uses=1]
- at i59_s = external global i59		; <i59*> [#uses=1]
- at i60_l = external global i60		; <i60*> [#uses=1]
- at i60_s = external global i60		; <i60*> [#uses=1]
- at i61_l = external global i61		; <i61*> [#uses=1]
- at i61_s = external global i61		; <i61*> [#uses=1]
- at i62_l = external global i62		; <i62*> [#uses=1]
- at i62_s = external global i62		; <i62*> [#uses=1]
- at i63_l = external global i63		; <i63*> [#uses=1]
- at i63_s = external global i63		; <i63*> [#uses=1]
- at i64_l = external global i64		; <i64*> [#uses=1]
- at i64_s = external global i64		; <i64*> [#uses=1]
+ at i1_l = external global i1		; <ptr> [#uses=1]
+ at i1_s = external global i1		; <ptr> [#uses=1]
+ at i2_l = external global i2		; <ptr> [#uses=1]
+ at i2_s = external global i2		; <ptr> [#uses=1]
+ at i3_l = external global i3		; <ptr> [#uses=1]
+ at i3_s = external global i3		; <ptr> [#uses=1]
+ at i4_l = external global i4		; <ptr> [#uses=1]
+ at i4_s = external global i4		; <ptr> [#uses=1]
+ at i5_l = external global i5		; <ptr> [#uses=1]
+ at i5_s = external global i5		; <ptr> [#uses=1]
+ at i6_l = external global i6		; <ptr> [#uses=1]
+ at i6_s = external global i6		; <ptr> [#uses=1]
+ at i7_l = external global i7		; <ptr> [#uses=1]
+ at i7_s = external global i7		; <ptr> [#uses=1]
+ at i8_l = external global i8		; <ptr> [#uses=1]
+ at i8_s = external global i8		; <ptr> [#uses=1]
+ at i9_l = external global i9		; <ptr> [#uses=1]
+ at i9_s = external global i9		; <ptr> [#uses=1]
+ at i10_l = external global i10		; <ptr> [#uses=1]
+ at i10_s = external global i10		; <ptr> [#uses=1]
+ at i11_l = external global i11		; <ptr> [#uses=1]
+ at i11_s = external global i11		; <ptr> [#uses=1]
+ at i12_l = external global i12		; <ptr> [#uses=1]
+ at i12_s = external global i12		; <ptr> [#uses=1]
+ at i13_l = external global i13		; <ptr> [#uses=1]
+ at i13_s = external global i13		; <ptr> [#uses=1]
+ at i14_l = external global i14		; <ptr> [#uses=1]
+ at i14_s = external global i14		; <ptr> [#uses=1]
+ at i15_l = external global i15		; <ptr> [#uses=1]
+ at i15_s = external global i15		; <ptr> [#uses=1]
+ at i16_l = external global i16		; <ptr> [#uses=1]
+ at i16_s = external global i16		; <ptr> [#uses=1]
+ at i17_l = external global i17		; <ptr> [#uses=1]
+ at i17_s = external global i17		; <ptr> [#uses=1]
+ at i18_l = external global i18		; <ptr> [#uses=1]
+ at i18_s = external global i18		; <ptr> [#uses=1]
+ at i19_l = external global i19		; <ptr> [#uses=1]
+ at i19_s = external global i19		; <ptr> [#uses=1]
+ at i20_l = external global i20		; <ptr> [#uses=1]
+ at i20_s = external global i20		; <ptr> [#uses=1]
+ at i21_l = external global i21		; <ptr> [#uses=1]
+ at i21_s = external global i21		; <ptr> [#uses=1]
+ at i22_l = external global i22		; <ptr> [#uses=1]
+ at i22_s = external global i22		; <ptr> [#uses=1]
+ at i23_l = external global i23		; <ptr> [#uses=1]
+ at i23_s = external global i23		; <ptr> [#uses=1]
+ at i24_l = external global i24		; <ptr> [#uses=1]
+ at i24_s = external global i24		; <ptr> [#uses=1]
+ at i25_l = external global i25		; <ptr> [#uses=1]
+ at i25_s = external global i25		; <ptr> [#uses=1]
+ at i26_l = external global i26		; <ptr> [#uses=1]
+ at i26_s = external global i26		; <ptr> [#uses=1]
+ at i27_l = external global i27		; <ptr> [#uses=1]
+ at i27_s = external global i27		; <ptr> [#uses=1]
+ at i28_l = external global i28		; <ptr> [#uses=1]
+ at i28_s = external global i28		; <ptr> [#uses=1]
+ at i29_l = external global i29		; <ptr> [#uses=1]
+ at i29_s = external global i29		; <ptr> [#uses=1]
+ at i30_l = external global i30		; <ptr> [#uses=1]
+ at i30_s = external global i30		; <ptr> [#uses=1]
+ at i31_l = external global i31		; <ptr> [#uses=1]
+ at i31_s = external global i31		; <ptr> [#uses=1]
+ at i32_l = external global i32		; <ptr> [#uses=1]
+ at i32_s = external global i32		; <ptr> [#uses=1]
+ at i33_l = external global i33		; <ptr> [#uses=1]
+ at i33_s = external global i33		; <ptr> [#uses=1]
+ at i34_l = external global i34		; <ptr> [#uses=1]
+ at i34_s = external global i34		; <ptr> [#uses=1]
+ at i35_l = external global i35		; <ptr> [#uses=1]
+ at i35_s = external global i35		; <ptr> [#uses=1]
+ at i36_l = external global i36		; <ptr> [#uses=1]
+ at i36_s = external global i36		; <ptr> [#uses=1]
+ at i37_l = external global i37		; <ptr> [#uses=1]
+ at i37_s = external global i37		; <ptr> [#uses=1]
+ at i38_l = external global i38		; <ptr> [#uses=1]
+ at i38_s = external global i38		; <ptr> [#uses=1]
+ at i39_l = external global i39		; <ptr> [#uses=1]
+ at i39_s = external global i39		; <ptr> [#uses=1]
+ at i40_l = external global i40		; <ptr> [#uses=1]
+ at i40_s = external global i40		; <ptr> [#uses=1]
+ at i41_l = external global i41		; <ptr> [#uses=1]
+ at i41_s = external global i41		; <ptr> [#uses=1]
+ at i42_l = external global i42		; <ptr> [#uses=1]
+ at i42_s = external global i42		; <ptr> [#uses=1]
+ at i43_l = external global i43		; <ptr> [#uses=1]
+ at i43_s = external global i43		; <ptr> [#uses=1]
+ at i44_l = external global i44		; <ptr> [#uses=1]
+ at i44_s = external global i44		; <ptr> [#uses=1]
+ at i45_l = external global i45		; <ptr> [#uses=1]
+ at i45_s = external global i45		; <ptr> [#uses=1]
+ at i46_l = external global i46		; <ptr> [#uses=1]
+ at i46_s = external global i46		; <ptr> [#uses=1]
+ at i47_l = external global i47		; <ptr> [#uses=1]
+ at i47_s = external global i47		; <ptr> [#uses=1]
+ at i48_l = external global i48		; <ptr> [#uses=1]
+ at i48_s = external global i48		; <ptr> [#uses=1]
+ at i49_l = external global i49		; <ptr> [#uses=1]
+ at i49_s = external global i49		; <ptr> [#uses=1]
+ at i50_l = external global i50		; <ptr> [#uses=1]
+ at i50_s = external global i50		; <ptr> [#uses=1]
+ at i51_l = external global i51		; <ptr> [#uses=1]
+ at i51_s = external global i51		; <ptr> [#uses=1]
+ at i52_l = external global i52		; <ptr> [#uses=1]
+ at i52_s = external global i52		; <ptr> [#uses=1]
+ at i53_l = external global i53		; <ptr> [#uses=1]
+ at i53_s = external global i53		; <ptr> [#uses=1]
+ at i54_l = external global i54		; <ptr> [#uses=1]
+ at i54_s = external global i54		; <ptr> [#uses=1]
+ at i55_l = external global i55		; <ptr> [#uses=1]
+ at i55_s = external global i55		; <ptr> [#uses=1]
+ at i56_l = external global i56		; <ptr> [#uses=1]
+ at i56_s = external global i56		; <ptr> [#uses=1]
+ at i57_l = external global i57		; <ptr> [#uses=1]
+ at i57_s = external global i57		; <ptr> [#uses=1]
+ at i58_l = external global i58		; <ptr> [#uses=1]
+ at i58_s = external global i58		; <ptr> [#uses=1]
+ at i59_l = external global i59		; <ptr> [#uses=1]
+ at i59_s = external global i59		; <ptr> [#uses=1]
+ at i60_l = external global i60		; <ptr> [#uses=1]
+ at i60_s = external global i60		; <ptr> [#uses=1]
+ at i61_l = external global i61		; <ptr> [#uses=1]
+ at i61_s = external global i61		; <ptr> [#uses=1]
+ at i62_l = external global i62		; <ptr> [#uses=1]
+ at i62_s = external global i62		; <ptr> [#uses=1]
+ at i63_l = external global i63		; <ptr> [#uses=1]
+ at i63_s = external global i63		; <ptr> [#uses=1]
+ at i64_l = external global i64		; <ptr> [#uses=1]
+ at i64_s = external global i64		; <ptr> [#uses=1]
 
 define void @i1_ls() nounwind  {
-	%tmp = load i1, i1* @i1_l		; <i1> [#uses=1]
-	store i1 %tmp, i1* @i1_s
+	%tmp = load i1, ptr @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, ptr @i1_s
 	ret void
 }
 
 define void @i2_ls() nounwind  {
-	%tmp = load i2, i2* @i2_l		; <i2> [#uses=1]
-	store i2 %tmp, i2* @i2_s
+	%tmp = load i2, ptr @i2_l		; <i2> [#uses=1]
+	store i2 %tmp, ptr @i2_s
 	ret void
 }
 
 define void @i3_ls() nounwind  {
-	%tmp = load i3, i3* @i3_l		; <i3> [#uses=1]
-	store i3 %tmp, i3* @i3_s
+	%tmp = load i3, ptr @i3_l		; <i3> [#uses=1]
+	store i3 %tmp, ptr @i3_s
 	ret void
 }
 
 define void @i4_ls() nounwind  {
-	%tmp = load i4, i4* @i4_l		; <i4> [#uses=1]
-	store i4 %tmp, i4* @i4_s
+	%tmp = load i4, ptr @i4_l		; <i4> [#uses=1]
+	store i4 %tmp, ptr @i4_s
 	ret void
 }
 
 define void @i5_ls() nounwind  {
-	%tmp = load i5, i5* @i5_l		; <i5> [#uses=1]
-	store i5 %tmp, i5* @i5_s
+	%tmp = load i5, ptr @i5_l		; <i5> [#uses=1]
+	store i5 %tmp, ptr @i5_s
 	ret void
 }
 
 define void @i6_ls() nounwind  {
-	%tmp = load i6, i6* @i6_l		; <i6> [#uses=1]
-	store i6 %tmp, i6* @i6_s
+	%tmp = load i6, ptr @i6_l		; <i6> [#uses=1]
+	store i6 %tmp, ptr @i6_s
 	ret void
 }
 
 define void @i7_ls() nounwind  {
-	%tmp = load i7, i7* @i7_l		; <i7> [#uses=1]
-	store i7 %tmp, i7* @i7_s
+	%tmp = load i7, ptr @i7_l		; <i7> [#uses=1]
+	store i7 %tmp, ptr @i7_s
 	ret void
 }
 
 define void @i8_ls() nounwind  {
-	%tmp = load i8, i8* @i8_l		; <i8> [#uses=1]
-	store i8 %tmp, i8* @i8_s
+	%tmp = load i8, ptr @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, ptr @i8_s
 	ret void
 }
 
 define void @i9_ls() nounwind  {
-	%tmp = load i9, i9* @i9_l		; <i9> [#uses=1]
-	store i9 %tmp, i9* @i9_s
+	%tmp = load i9, ptr @i9_l		; <i9> [#uses=1]
+	store i9 %tmp, ptr @i9_s
 	ret void
 }
 
 define void @i10_ls() nounwind  {
-	%tmp = load i10, i10* @i10_l		; <i10> [#uses=1]
-	store i10 %tmp, i10* @i10_s
+	%tmp = load i10, ptr @i10_l		; <i10> [#uses=1]
+	store i10 %tmp, ptr @i10_s
 	ret void
 }
 
 define void @i11_ls() nounwind  {
-	%tmp = load i11, i11* @i11_l		; <i11> [#uses=1]
-	store i11 %tmp, i11* @i11_s
+	%tmp = load i11, ptr @i11_l		; <i11> [#uses=1]
+	store i11 %tmp, ptr @i11_s
 	ret void
 }
 
 define void @i12_ls() nounwind  {
-	%tmp = load i12, i12* @i12_l		; <i12> [#uses=1]
-	store i12 %tmp, i12* @i12_s
+	%tmp = load i12, ptr @i12_l		; <i12> [#uses=1]
+	store i12 %tmp, ptr @i12_s
 	ret void
 }
 
 define void @i13_ls() nounwind  {
-	%tmp = load i13, i13* @i13_l		; <i13> [#uses=1]
-	store i13 %tmp, i13* @i13_s
+	%tmp = load i13, ptr @i13_l		; <i13> [#uses=1]
+	store i13 %tmp, ptr @i13_s
 	ret void
 }
 
 define void @i14_ls() nounwind  {
-	%tmp = load i14, i14* @i14_l		; <i14> [#uses=1]
-	store i14 %tmp, i14* @i14_s
+	%tmp = load i14, ptr @i14_l		; <i14> [#uses=1]
+	store i14 %tmp, ptr @i14_s
 	ret void
 }
 
 define void @i15_ls() nounwind  {
-	%tmp = load i15, i15* @i15_l		; <i15> [#uses=1]
-	store i15 %tmp, i15* @i15_s
+	%tmp = load i15, ptr @i15_l		; <i15> [#uses=1]
+	store i15 %tmp, ptr @i15_s
 	ret void
 }
 
 define void @i16_ls() nounwind  {
-	%tmp = load i16, i16* @i16_l		; <i16> [#uses=1]
-	store i16 %tmp, i16* @i16_s
+	%tmp = load i16, ptr @i16_l		; <i16> [#uses=1]
+	store i16 %tmp, ptr @i16_s
 	ret void
 }
 
 define void @i17_ls() nounwind  {
-	%tmp = load i17, i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
+	%tmp = load i17, ptr @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, ptr @i17_s
 	ret void
 }
 
 define void @i18_ls() nounwind  {
-	%tmp = load i18, i18* @i18_l		; <i18> [#uses=1]
-	store i18 %tmp, i18* @i18_s
+	%tmp = load i18, ptr @i18_l		; <i18> [#uses=1]
+	store i18 %tmp, ptr @i18_s
 	ret void
 }
 
 define void @i19_ls() nounwind  {
-	%tmp = load i19, i19* @i19_l		; <i19> [#uses=1]
-	store i19 %tmp, i19* @i19_s
+	%tmp = load i19, ptr @i19_l		; <i19> [#uses=1]
+	store i19 %tmp, ptr @i19_s
 	ret void
 }
 
 define void @i20_ls() nounwind  {
-	%tmp = load i20, i20* @i20_l		; <i20> [#uses=1]
-	store i20 %tmp, i20* @i20_s
+	%tmp = load i20, ptr @i20_l		; <i20> [#uses=1]
+	store i20 %tmp, ptr @i20_s
 	ret void
 }
 
 define void @i21_ls() nounwind  {
-	%tmp = load i21, i21* @i21_l		; <i21> [#uses=1]
-	store i21 %tmp, i21* @i21_s
+	%tmp = load i21, ptr @i21_l		; <i21> [#uses=1]
+	store i21 %tmp, ptr @i21_s
 	ret void
 }
 
 define void @i22_ls() nounwind  {
-	%tmp = load i22, i22* @i22_l		; <i22> [#uses=1]
-	store i22 %tmp, i22* @i22_s
+	%tmp = load i22, ptr @i22_l		; <i22> [#uses=1]
+	store i22 %tmp, ptr @i22_s
 	ret void
 }
 
 define void @i23_ls() nounwind  {
-	%tmp = load i23, i23* @i23_l		; <i23> [#uses=1]
-	store i23 %tmp, i23* @i23_s
+	%tmp = load i23, ptr @i23_l		; <i23> [#uses=1]
+	store i23 %tmp, ptr @i23_s
 	ret void
 }
 
 define void @i24_ls() nounwind  {
-	%tmp = load i24, i24* @i24_l		; <i24> [#uses=1]
-	store i24 %tmp, i24* @i24_s
+	%tmp = load i24, ptr @i24_l		; <i24> [#uses=1]
+	store i24 %tmp, ptr @i24_s
 	ret void
 }
 
 define void @i25_ls() nounwind  {
-	%tmp = load i25, i25* @i25_l		; <i25> [#uses=1]
-	store i25 %tmp, i25* @i25_s
+	%tmp = load i25, ptr @i25_l		; <i25> [#uses=1]
+	store i25 %tmp, ptr @i25_s
 	ret void
 }
 
 define void @i26_ls() nounwind  {
-	%tmp = load i26, i26* @i26_l		; <i26> [#uses=1]
-	store i26 %tmp, i26* @i26_s
+	%tmp = load i26, ptr @i26_l		; <i26> [#uses=1]
+	store i26 %tmp, ptr @i26_s
 	ret void
 }
 
 define void @i27_ls() nounwind  {
-	%tmp = load i27, i27* @i27_l		; <i27> [#uses=1]
-	store i27 %tmp, i27* @i27_s
+	%tmp = load i27, ptr @i27_l		; <i27> [#uses=1]
+	store i27 %tmp, ptr @i27_s
 	ret void
 }
 
 define void @i28_ls() nounwind  {
-	%tmp = load i28, i28* @i28_l		; <i28> [#uses=1]
-	store i28 %tmp, i28* @i28_s
+	%tmp = load i28, ptr @i28_l		; <i28> [#uses=1]
+	store i28 %tmp, ptr @i28_s
 	ret void
 }
 
 define void @i29_ls() nounwind  {
-	%tmp = load i29, i29* @i29_l		; <i29> [#uses=1]
-	store i29 %tmp, i29* @i29_s
+	%tmp = load i29, ptr @i29_l		; <i29> [#uses=1]
+	store i29 %tmp, ptr @i29_s
 	ret void
 }
 
 define void @i30_ls() nounwind  {
-	%tmp = load i30, i30* @i30_l		; <i30> [#uses=1]
-	store i30 %tmp, i30* @i30_s
+	%tmp = load i30, ptr @i30_l		; <i30> [#uses=1]
+	store i30 %tmp, ptr @i30_s
 	ret void
 }
 
 define void @i31_ls() nounwind  {
-	%tmp = load i31, i31* @i31_l		; <i31> [#uses=1]
-	store i31 %tmp, i31* @i31_s
+	%tmp = load i31, ptr @i31_l		; <i31> [#uses=1]
+	store i31 %tmp, ptr @i31_s
 	ret void
 }
 
 define void @i32_ls() nounwind  {
-	%tmp = load i32, i32* @i32_l		; <i32> [#uses=1]
-	store i32 %tmp, i32* @i32_s
+	%tmp = load i32, ptr @i32_l		; <i32> [#uses=1]
+	store i32 %tmp, ptr @i32_s
 	ret void
 }
 
 define void @i33_ls() nounwind  {
-	%tmp = load i33, i33* @i33_l		; <i33> [#uses=1]
-	store i33 %tmp, i33* @i33_s
+	%tmp = load i33, ptr @i33_l		; <i33> [#uses=1]
+	store i33 %tmp, ptr @i33_s
 	ret void
 }
 
 define void @i34_ls() nounwind  {
-	%tmp = load i34, i34* @i34_l		; <i34> [#uses=1]
-	store i34 %tmp, i34* @i34_s
+	%tmp = load i34, ptr @i34_l		; <i34> [#uses=1]
+	store i34 %tmp, ptr @i34_s
 	ret void
 }
 
 define void @i35_ls() nounwind  {
-	%tmp = load i35, i35* @i35_l		; <i35> [#uses=1]
-	store i35 %tmp, i35* @i35_s
+	%tmp = load i35, ptr @i35_l		; <i35> [#uses=1]
+	store i35 %tmp, ptr @i35_s
 	ret void
 }
 
 define void @i36_ls() nounwind  {
-	%tmp = load i36, i36* @i36_l		; <i36> [#uses=1]
-	store i36 %tmp, i36* @i36_s
+	%tmp = load i36, ptr @i36_l		; <i36> [#uses=1]
+	store i36 %tmp, ptr @i36_s
 	ret void
 }
 
 define void @i37_ls() nounwind  {
-	%tmp = load i37, i37* @i37_l		; <i37> [#uses=1]
-	store i37 %tmp, i37* @i37_s
+	%tmp = load i37, ptr @i37_l		; <i37> [#uses=1]
+	store i37 %tmp, ptr @i37_s
 	ret void
 }
 
 define void @i38_ls() nounwind  {
-	%tmp = load i38, i38* @i38_l		; <i38> [#uses=1]
-	store i38 %tmp, i38* @i38_s
+	%tmp = load i38, ptr @i38_l		; <i38> [#uses=1]
+	store i38 %tmp, ptr @i38_s
 	ret void
 }
 
 define void @i39_ls() nounwind  {
-	%tmp = load i39, i39* @i39_l		; <i39> [#uses=1]
-	store i39 %tmp, i39* @i39_s
+	%tmp = load i39, ptr @i39_l		; <i39> [#uses=1]
+	store i39 %tmp, ptr @i39_s
 	ret void
 }
 
 define void @i40_ls() nounwind  {
-	%tmp = load i40, i40* @i40_l		; <i40> [#uses=1]
-	store i40 %tmp, i40* @i40_s
+	%tmp = load i40, ptr @i40_l		; <i40> [#uses=1]
+	store i40 %tmp, ptr @i40_s
 	ret void
 }
 
 define void @i41_ls() nounwind  {
-	%tmp = load i41, i41* @i41_l		; <i41> [#uses=1]
-	store i41 %tmp, i41* @i41_s
+	%tmp = load i41, ptr @i41_l		; <i41> [#uses=1]
+	store i41 %tmp, ptr @i41_s
 	ret void
 }
 
 define void @i42_ls() nounwind  {
-	%tmp = load i42, i42* @i42_l		; <i42> [#uses=1]
-	store i42 %tmp, i42* @i42_s
+	%tmp = load i42, ptr @i42_l		; <i42> [#uses=1]
+	store i42 %tmp, ptr @i42_s
 	ret void
 }
 
 define void @i43_ls() nounwind  {
-	%tmp = load i43, i43* @i43_l		; <i43> [#uses=1]
-	store i43 %tmp, i43* @i43_s
+	%tmp = load i43, ptr @i43_l		; <i43> [#uses=1]
+	store i43 %tmp, ptr @i43_s
 	ret void
 }
 
 define void @i44_ls() nounwind  {
-	%tmp = load i44, i44* @i44_l		; <i44> [#uses=1]
-	store i44 %tmp, i44* @i44_s
+	%tmp = load i44, ptr @i44_l		; <i44> [#uses=1]
+	store i44 %tmp, ptr @i44_s
 	ret void
 }
 
 define void @i45_ls() nounwind  {
-	%tmp = load i45, i45* @i45_l		; <i45> [#uses=1]
-	store i45 %tmp, i45* @i45_s
+	%tmp = load i45, ptr @i45_l		; <i45> [#uses=1]
+	store i45 %tmp, ptr @i45_s
 	ret void
 }
 
 define void @i46_ls() nounwind  {
-	%tmp = load i46, i46* @i46_l		; <i46> [#uses=1]
-	store i46 %tmp, i46* @i46_s
+	%tmp = load i46, ptr @i46_l		; <i46> [#uses=1]
+	store i46 %tmp, ptr @i46_s
 	ret void
 }
 
 define void @i47_ls() nounwind  {
-	%tmp = load i47, i47* @i47_l		; <i47> [#uses=1]
-	store i47 %tmp, i47* @i47_s
+	%tmp = load i47, ptr @i47_l		; <i47> [#uses=1]
+	store i47 %tmp, ptr @i47_s
 	ret void
 }
 
 define void @i48_ls() nounwind  {
-	%tmp = load i48, i48* @i48_l		; <i48> [#uses=1]
-	store i48 %tmp, i48* @i48_s
+	%tmp = load i48, ptr @i48_l		; <i48> [#uses=1]
+	store i48 %tmp, ptr @i48_s
 	ret void
 }
 
 define void @i49_ls() nounwind  {
-	%tmp = load i49, i49* @i49_l		; <i49> [#uses=1]
-	store i49 %tmp, i49* @i49_s
+	%tmp = load i49, ptr @i49_l		; <i49> [#uses=1]
+	store i49 %tmp, ptr @i49_s
 	ret void
 }
 
 define void @i50_ls() nounwind  {
-	%tmp = load i50, i50* @i50_l		; <i50> [#uses=1]
-	store i50 %tmp, i50* @i50_s
+	%tmp = load i50, ptr @i50_l		; <i50> [#uses=1]
+	store i50 %tmp, ptr @i50_s
 	ret void
 }
 
 define void @i51_ls() nounwind  {
-	%tmp = load i51, i51* @i51_l		; <i51> [#uses=1]
-	store i51 %tmp, i51* @i51_s
+	%tmp = load i51, ptr @i51_l		; <i51> [#uses=1]
+	store i51 %tmp, ptr @i51_s
 	ret void
 }
 
 define void @i52_ls() nounwind  {
-	%tmp = load i52, i52* @i52_l		; <i52> [#uses=1]
-	store i52 %tmp, i52* @i52_s
+	%tmp = load i52, ptr @i52_l		; <i52> [#uses=1]
+	store i52 %tmp, ptr @i52_s
 	ret void
 }
 
 define void @i53_ls() nounwind  {
-	%tmp = load i53, i53* @i53_l		; <i53> [#uses=1]
-	store i53 %tmp, i53* @i53_s
+	%tmp = load i53, ptr @i53_l		; <i53> [#uses=1]
+	store i53 %tmp, ptr @i53_s
 	ret void
 }
 
 define void @i54_ls() nounwind  {
-	%tmp = load i54, i54* @i54_l		; <i54> [#uses=1]
-	store i54 %tmp, i54* @i54_s
+	%tmp = load i54, ptr @i54_l		; <i54> [#uses=1]
+	store i54 %tmp, ptr @i54_s
 	ret void
 }
 
 define void @i55_ls() nounwind  {
-	%tmp = load i55, i55* @i55_l		; <i55> [#uses=1]
-	store i55 %tmp, i55* @i55_s
+	%tmp = load i55, ptr @i55_l		; <i55> [#uses=1]
+	store i55 %tmp, ptr @i55_s
 	ret void
 }
 
 define void @i56_ls() nounwind  {
-	%tmp = load i56, i56* @i56_l		; <i56> [#uses=1]
-	store i56 %tmp, i56* @i56_s
+	%tmp = load i56, ptr @i56_l		; <i56> [#uses=1]
+	store i56 %tmp, ptr @i56_s
 	ret void
 }
 
 define void @i57_ls() nounwind  {
-	%tmp = load i57, i57* @i57_l		; <i57> [#uses=1]
-	store i57 %tmp, i57* @i57_s
+	%tmp = load i57, ptr @i57_l		; <i57> [#uses=1]
+	store i57 %tmp, ptr @i57_s
 	ret void
 }
 
 define void @i58_ls() nounwind  {
-	%tmp = load i58, i58* @i58_l		; <i58> [#uses=1]
-	store i58 %tmp, i58* @i58_s
+	%tmp = load i58, ptr @i58_l		; <i58> [#uses=1]
+	store i58 %tmp, ptr @i58_s
 	ret void
 }
 
 define void @i59_ls() nounwind  {
-	%tmp = load i59, i59* @i59_l		; <i59> [#uses=1]
-	store i59 %tmp, i59* @i59_s
+	%tmp = load i59, ptr @i59_l		; <i59> [#uses=1]
+	store i59 %tmp, ptr @i59_s
 	ret void
 }
 
 define void @i60_ls() nounwind  {
-	%tmp = load i60, i60* @i60_l		; <i60> [#uses=1]
-	store i60 %tmp, i60* @i60_s
+	%tmp = load i60, ptr @i60_l		; <i60> [#uses=1]
+	store i60 %tmp, ptr @i60_s
 	ret void
 }
 
 define void @i61_ls() nounwind  {
-	%tmp = load i61, i61* @i61_l		; <i61> [#uses=1]
-	store i61 %tmp, i61* @i61_s
+	%tmp = load i61, ptr @i61_l		; <i61> [#uses=1]
+	store i61 %tmp, ptr @i61_s
 	ret void
 }
 
 define void @i62_ls() nounwind  {
-	%tmp = load i62, i62* @i62_l		; <i62> [#uses=1]
-	store i62 %tmp, i62* @i62_s
+	%tmp = load i62, ptr @i62_l		; <i62> [#uses=1]
+	store i62 %tmp, ptr @i62_s
 	ret void
 }
 
 define void @i63_ls() nounwind  {
-	%tmp = load i63, i63* @i63_l		; <i63> [#uses=1]
-	store i63 %tmp, i63* @i63_s
+	%tmp = load i63, ptr @i63_l		; <i63> [#uses=1]
+	store i63 %tmp, ptr @i63_s
 	ret void
 }
 
 define void @i64_ls() nounwind  {
-	%tmp = load i64, i64* @i64_l		; <i64> [#uses=1]
-	store i64 %tmp, i64* @i64_s
+	%tmp = load i64, ptr @i64_l		; <i64> [#uses=1]
+	store i64 %tmp, ptr @i64_s
 	ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/APIntParam.ll b/llvm/test/CodeGen/NVPTX/APIntParam.ll
index e7b5fde73dfc2..8b323b45ac640 100644
--- a/llvm/test/CodeGen/NVPTX/APIntParam.ll
+++ b/llvm/test/CodeGen/NVPTX/APIntParam.ll
@@ -1,387 +1,387 @@
 ; RUN: llc -march=nvptx  < %s > %t
 ; RUN: llc -march=nvptx64  < %s > %t
 
- at i1_s = external global i1		; <i1*> [#uses=1]
- at i2_s = external global i2		; <i2*> [#uses=1]
- at i3_s = external global i3		; <i3*> [#uses=1]
- at i4_s = external global i4		; <i4*> [#uses=1]
- at i5_s = external global i5		; <i5*> [#uses=1]
- at i6_s = external global i6		; <i6*> [#uses=1]
- at i7_s = external global i7		; <i7*> [#uses=1]
- at i8_s = external global i8		; <i8*> [#uses=1]
- at i9_s = external global i9		; <i9*> [#uses=1]
- at i10_s = external global i10		; <i10*> [#uses=1]
- at i11_s = external global i11		; <i11*> [#uses=1]
- at i12_s = external global i12		; <i12*> [#uses=1]
- at i13_s = external global i13		; <i13*> [#uses=1]
- at i14_s = external global i14		; <i14*> [#uses=1]
- at i15_s = external global i15		; <i15*> [#uses=1]
- at i16_s = external global i16		; <i16*> [#uses=1]
- at i17_s = external global i17		; <i17*> [#uses=1]
- at i18_s = external global i18		; <i18*> [#uses=1]
- at i19_s = external global i19		; <i19*> [#uses=1]
- at i20_s = external global i20		; <i20*> [#uses=1]
- at i21_s = external global i21		; <i21*> [#uses=1]
- at i22_s = external global i22		; <i22*> [#uses=1]
- at i23_s = external global i23		; <i23*> [#uses=1]
- at i24_s = external global i24		; <i24*> [#uses=1]
- at i25_s = external global i25		; <i25*> [#uses=1]
- at i26_s = external global i26		; <i26*> [#uses=1]
- at i27_s = external global i27		; <i27*> [#uses=1]
- at i28_s = external global i28		; <i28*> [#uses=1]
- at i29_s = external global i29		; <i29*> [#uses=1]
- at i30_s = external global i30		; <i30*> [#uses=1]
- at i31_s = external global i31		; <i31*> [#uses=1]
- at i32_s = external global i32		; <i32*> [#uses=1]
- at i33_s = external global i33		; <i33*> [#uses=1]
- at i34_s = external global i34		; <i34*> [#uses=1]
- at i35_s = external global i35		; <i35*> [#uses=1]
- at i36_s = external global i36		; <i36*> [#uses=1]
- at i37_s = external global i37		; <i37*> [#uses=1]
- at i38_s = external global i38		; <i38*> [#uses=1]
- at i39_s = external global i39		; <i39*> [#uses=1]
- at i40_s = external global i40		; <i40*> [#uses=1]
- at i41_s = external global i41		; <i41*> [#uses=1]
- at i42_s = external global i42		; <i42*> [#uses=1]
- at i43_s = external global i43		; <i43*> [#uses=1]
- at i44_s = external global i44		; <i44*> [#uses=1]
- at i45_s = external global i45		; <i45*> [#uses=1]
- at i46_s = external global i46		; <i46*> [#uses=1]
- at i47_s = external global i47		; <i47*> [#uses=1]
- at i48_s = external global i48		; <i48*> [#uses=1]
- at i49_s = external global i49		; <i49*> [#uses=1]
- at i50_s = external global i50		; <i50*> [#uses=1]
- at i51_s = external global i51		; <i51*> [#uses=1]
- at i52_s = external global i52		; <i52*> [#uses=1]
- at i53_s = external global i53		; <i53*> [#uses=1]
- at i54_s = external global i54		; <i54*> [#uses=1]
- at i55_s = external global i55		; <i55*> [#uses=1]
- at i56_s = external global i56		; <i56*> [#uses=1]
- at i57_s = external global i57		; <i57*> [#uses=1]
- at i58_s = external global i58		; <i58*> [#uses=1]
- at i59_s = external global i59		; <i59*> [#uses=1]
- at i60_s = external global i60		; <i60*> [#uses=1]
- at i61_s = external global i61		; <i61*> [#uses=1]
- at i62_s = external global i62		; <i62*> [#uses=1]
- at i63_s = external global i63		; <i63*> [#uses=1]
- at i64_s = external global i64		; <i64*> [#uses=1]
+ at i1_s = external global i1		; <ptr> [#uses=1]
+ at i2_s = external global i2		; <ptr> [#uses=1]
+ at i3_s = external global i3		; <ptr> [#uses=1]
+ at i4_s = external global i4		; <ptr> [#uses=1]
+ at i5_s = external global i5		; <ptr> [#uses=1]
+ at i6_s = external global i6		; <ptr> [#uses=1]
+ at i7_s = external global i7		; <ptr> [#uses=1]
+ at i8_s = external global i8		; <ptr> [#uses=1]
+ at i9_s = external global i9		; <ptr> [#uses=1]
+ at i10_s = external global i10		; <ptr> [#uses=1]
+ at i11_s = external global i11		; <ptr> [#uses=1]
+ at i12_s = external global i12		; <ptr> [#uses=1]
+ at i13_s = external global i13		; <ptr> [#uses=1]
+ at i14_s = external global i14		; <ptr> [#uses=1]
+ at i15_s = external global i15		; <ptr> [#uses=1]
+ at i16_s = external global i16		; <ptr> [#uses=1]
+ at i17_s = external global i17		; <ptr> [#uses=1]
+ at i18_s = external global i18		; <ptr> [#uses=1]
+ at i19_s = external global i19		; <ptr> [#uses=1]
+ at i20_s = external global i20		; <ptr> [#uses=1]
+ at i21_s = external global i21		; <ptr> [#uses=1]
+ at i22_s = external global i22		; <ptr> [#uses=1]
+ at i23_s = external global i23		; <ptr> [#uses=1]
+ at i24_s = external global i24		; <ptr> [#uses=1]
+ at i25_s = external global i25		; <ptr> [#uses=1]
+ at i26_s = external global i26		; <ptr> [#uses=1]
+ at i27_s = external global i27		; <ptr> [#uses=1]
+ at i28_s = external global i28		; <ptr> [#uses=1]
+ at i29_s = external global i29		; <ptr> [#uses=1]
+ at i30_s = external global i30		; <ptr> [#uses=1]
+ at i31_s = external global i31		; <ptr> [#uses=1]
+ at i32_s = external global i32		; <ptr> [#uses=1]
+ at i33_s = external global i33		; <ptr> [#uses=1]
+ at i34_s = external global i34		; <ptr> [#uses=1]
+ at i35_s = external global i35		; <ptr> [#uses=1]
+ at i36_s = external global i36		; <ptr> [#uses=1]
+ at i37_s = external global i37		; <ptr> [#uses=1]
+ at i38_s = external global i38		; <ptr> [#uses=1]
+ at i39_s = external global i39		; <ptr> [#uses=1]
+ at i40_s = external global i40		; <ptr> [#uses=1]
+ at i41_s = external global i41		; <ptr> [#uses=1]
+ at i42_s = external global i42		; <ptr> [#uses=1]
+ at i43_s = external global i43		; <ptr> [#uses=1]
+ at i44_s = external global i44		; <ptr> [#uses=1]
+ at i45_s = external global i45		; <ptr> [#uses=1]
+ at i46_s = external global i46		; <ptr> [#uses=1]
+ at i47_s = external global i47		; <ptr> [#uses=1]
+ at i48_s = external global i48		; <ptr> [#uses=1]
+ at i49_s = external global i49		; <ptr> [#uses=1]
+ at i50_s = external global i50		; <ptr> [#uses=1]
+ at i51_s = external global i51		; <ptr> [#uses=1]
+ at i52_s = external global i52		; <ptr> [#uses=1]
+ at i53_s = external global i53		; <ptr> [#uses=1]
+ at i54_s = external global i54		; <ptr> [#uses=1]
+ at i55_s = external global i55		; <ptr> [#uses=1]
+ at i56_s = external global i56		; <ptr> [#uses=1]
+ at i57_s = external global i57		; <ptr> [#uses=1]
+ at i58_s = external global i58		; <ptr> [#uses=1]
+ at i59_s = external global i59		; <ptr> [#uses=1]
+ at i60_s = external global i60		; <ptr> [#uses=1]
+ at i61_s = external global i61		; <ptr> [#uses=1]
+ at i62_s = external global i62		; <ptr> [#uses=1]
+ at i63_s = external global i63		; <ptr> [#uses=1]
+ at i64_s = external global i64		; <ptr> [#uses=1]
 
 define void @i1_ls(i1 %x) nounwind  {
-	store i1 %x, i1* @i1_s
+	store i1 %x, ptr @i1_s
 	ret void
 }
 
 define void @i2_ls(i2 %x) nounwind  {
-	store i2 %x, i2* @i2_s
+	store i2 %x, ptr @i2_s
 	ret void
 }
 
 define void @i3_ls(i3 %x) nounwind  {
-	store i3 %x, i3* @i3_s
+	store i3 %x, ptr @i3_s
 	ret void
 }
 
 define void @i4_ls(i4 %x) nounwind  {
-	store i4 %x, i4* @i4_s
+	store i4 %x, ptr @i4_s
 	ret void
 }
 
 define void @i5_ls(i5 %x) nounwind  {
-	store i5 %x, i5* @i5_s
+	store i5 %x, ptr @i5_s
 	ret void
 }
 
 define void @i6_ls(i6 %x) nounwind  {
-	store i6 %x, i6* @i6_s
+	store i6 %x, ptr @i6_s
 	ret void
 }
 
 define void @i7_ls(i7 %x) nounwind  {
-	store i7 %x, i7* @i7_s
+	store i7 %x, ptr @i7_s
 	ret void
 }
 
 define void @i8_ls(i8 %x) nounwind  {
-	store i8 %x, i8* @i8_s
+	store i8 %x, ptr @i8_s
 	ret void
 }
 
 define void @i9_ls(i9 %x) nounwind  {
-	store i9 %x, i9* @i9_s
+	store i9 %x, ptr @i9_s
 	ret void
 }
 
 define void @i10_ls(i10 %x) nounwind  {
-	store i10 %x, i10* @i10_s
+	store i10 %x, ptr @i10_s
 	ret void
 }
 
 define void @i11_ls(i11 %x) nounwind  {
-	store i11 %x, i11* @i11_s
+	store i11 %x, ptr @i11_s
 	ret void
 }
 
 define void @i12_ls(i12 %x) nounwind  {
-	store i12 %x, i12* @i12_s
+	store i12 %x, ptr @i12_s
 	ret void
 }
 
 define void @i13_ls(i13 %x) nounwind  {
-	store i13 %x, i13* @i13_s
+	store i13 %x, ptr @i13_s
 	ret void
 }
 
 define void @i14_ls(i14 %x) nounwind  {
-	store i14 %x, i14* @i14_s
+	store i14 %x, ptr @i14_s
 	ret void
 }
 
 define void @i15_ls(i15 %x) nounwind  {
-	store i15 %x, i15* @i15_s
+	store i15 %x, ptr @i15_s
 	ret void
 }
 
 define void @i16_ls(i16 %x) nounwind  {
-	store i16 %x, i16* @i16_s
+	store i16 %x, ptr @i16_s
 	ret void
 }
 
 define void @i17_ls(i17 %x) nounwind  {
-	store i17 %x, i17* @i17_s
+	store i17 %x, ptr @i17_s
 	ret void
 }
 
 define void @i18_ls(i18 %x) nounwind  {
-	store i18 %x, i18* @i18_s
+	store i18 %x, ptr @i18_s
 	ret void
 }
 
 define void @i19_ls(i19 %x) nounwind  {
-	store i19 %x, i19* @i19_s
+	store i19 %x, ptr @i19_s
 	ret void
 }
 
 define void @i20_ls(i20 %x) nounwind  {
-	store i20 %x, i20* @i20_s
+	store i20 %x, ptr @i20_s
 	ret void
 }
 
 define void @i21_ls(i21 %x) nounwind  {
-	store i21 %x, i21* @i21_s
+	store i21 %x, ptr @i21_s
 	ret void
 }
 
 define void @i22_ls(i22 %x) nounwind  {
-	store i22 %x, i22* @i22_s
+	store i22 %x, ptr @i22_s
 	ret void
 }
 
 define void @i23_ls(i23 %x) nounwind  {
-	store i23 %x, i23* @i23_s
+	store i23 %x, ptr @i23_s
 	ret void
 }
 
 define void @i24_ls(i24 %x) nounwind  {
-	store i24 %x, i24* @i24_s
+	store i24 %x, ptr @i24_s
 	ret void
 }
 
 define void @i25_ls(i25 %x) nounwind  {
-	store i25 %x, i25* @i25_s
+	store i25 %x, ptr @i25_s
 	ret void
 }
 
 define void @i26_ls(i26 %x) nounwind  {
-	store i26 %x, i26* @i26_s
+	store i26 %x, ptr @i26_s
 	ret void
 }
 
 define void @i27_ls(i27 %x) nounwind  {
-	store i27 %x, i27* @i27_s
+	store i27 %x, ptr @i27_s
 	ret void
 }
 
 define void @i28_ls(i28 %x) nounwind  {
-	store i28 %x, i28* @i28_s
+	store i28 %x, ptr @i28_s
 	ret void
 }
 
 define void @i29_ls(i29 %x) nounwind  {
-	store i29 %x, i29* @i29_s
+	store i29 %x, ptr @i29_s
 	ret void
 }
 
 define void @i30_ls(i30 %x) nounwind  {
-	store i30 %x, i30* @i30_s
+	store i30 %x, ptr @i30_s
 	ret void
 }
 
 define void @i31_ls(i31 %x) nounwind  {
-	store i31 %x, i31* @i31_s
+	store i31 %x, ptr @i31_s
 	ret void
 }
 
 define void @i32_ls(i32 %x) nounwind  {
-	store i32 %x, i32* @i32_s
+	store i32 %x, ptr @i32_s
 	ret void
 }
 
 define void @i33_ls(i33 %x) nounwind  {
-	store i33 %x, i33* @i33_s
+	store i33 %x, ptr @i33_s
 	ret void
 }
 
 define void @i34_ls(i34 %x) nounwind  {
-	store i34 %x, i34* @i34_s
+	store i34 %x, ptr @i34_s
 	ret void
 }
 
 define void @i35_ls(i35 %x) nounwind  {
-	store i35 %x, i35* @i35_s
+	store i35 %x, ptr @i35_s
 	ret void
 }
 
 define void @i36_ls(i36 %x) nounwind  {
-	store i36 %x, i36* @i36_s
+	store i36 %x, ptr @i36_s
 	ret void
 }
 
 define void @i37_ls(i37 %x) nounwind  {
-	store i37 %x, i37* @i37_s
+	store i37 %x, ptr @i37_s
 	ret void
 }
 
 define void @i38_ls(i38 %x) nounwind  {
-	store i38 %x, i38* @i38_s
+	store i38 %x, ptr @i38_s
 	ret void
 }
 
 define void @i39_ls(i39 %x) nounwind  {
-	store i39 %x, i39* @i39_s
+	store i39 %x, ptr @i39_s
 	ret void
 }
 
 define void @i40_ls(i40 %x) nounwind  {
-	store i40 %x, i40* @i40_s
+	store i40 %x, ptr @i40_s
 	ret void
 }
 
 define void @i41_ls(i41 %x) nounwind  {
-	store i41 %x, i41* @i41_s
+	store i41 %x, ptr @i41_s
 	ret void
 }
 
 define void @i42_ls(i42 %x) nounwind  {
-	store i42 %x, i42* @i42_s
+	store i42 %x, ptr @i42_s
 	ret void
 }
 
 define void @i43_ls(i43 %x) nounwind  {
-	store i43 %x, i43* @i43_s
+	store i43 %x, ptr @i43_s
 	ret void
 }
 
 define void @i44_ls(i44 %x) nounwind  {
-	store i44 %x, i44* @i44_s
+	store i44 %x, ptr @i44_s
 	ret void
 }
 
 define void @i45_ls(i45 %x) nounwind  {
-	store i45 %x, i45* @i45_s
+	store i45 %x, ptr @i45_s
 	ret void
 }
 
 define void @i46_ls(i46 %x) nounwind  {
-	store i46 %x, i46* @i46_s
+	store i46 %x, ptr @i46_s
 	ret void
 }
 
 define void @i47_ls(i47 %x) nounwind  {
-	store i47 %x, i47* @i47_s
+	store i47 %x, ptr @i47_s
 	ret void
 }
 
 define void @i48_ls(i48 %x) nounwind  {
-	store i48 %x, i48* @i48_s
+	store i48 %x, ptr @i48_s
 	ret void
 }
 
 define void @i49_ls(i49 %x) nounwind  {
-	store i49 %x, i49* @i49_s
+	store i49 %x, ptr @i49_s
 	ret void
 }
 
 define void @i50_ls(i50 %x) nounwind  {
-	store i50 %x, i50* @i50_s
+	store i50 %x, ptr @i50_s
 	ret void
 }
 
 define void @i51_ls(i51 %x) nounwind  {
-	store i51 %x, i51* @i51_s
+	store i51 %x, ptr @i51_s
 	ret void
 }
 
 define void @i52_ls(i52 %x) nounwind  {
-	store i52 %x, i52* @i52_s
+	store i52 %x, ptr @i52_s
 	ret void
 }
 
 define void @i53_ls(i53 %x) nounwind  {
-	store i53 %x, i53* @i53_s
+	store i53 %x, ptr @i53_s
 	ret void
 }
 
 define void @i54_ls(i54 %x) nounwind  {
-	store i54 %x, i54* @i54_s
+	store i54 %x, ptr @i54_s
 	ret void
 }
 
 define void @i55_ls(i55 %x) nounwind  {
-	store i55 %x, i55* @i55_s
+	store i55 %x, ptr @i55_s
 	ret void
 }
 
 define void @i56_ls(i56 %x) nounwind  {
-	store i56 %x, i56* @i56_s
+	store i56 %x, ptr @i56_s
 	ret void
 }
 
 define void @i57_ls(i57 %x) nounwind  {
-	store i57 %x, i57* @i57_s
+	store i57 %x, ptr @i57_s
 	ret void
 }
 
 define void @i58_ls(i58 %x) nounwind  {
-	store i58 %x, i58* @i58_s
+	store i58 %x, ptr @i58_s
 	ret void
 }
 
 define void @i59_ls(i59 %x) nounwind  {
-	store i59 %x, i59* @i59_s
+	store i59 %x, ptr @i59_s
 	ret void
 }
 
 define void @i60_ls(i60 %x) nounwind  {
-	store i60 %x, i60* @i60_s
+	store i60 %x, ptr @i60_s
 	ret void
 }
 
 define void @i61_ls(i61 %x) nounwind  {
-	store i61 %x, i61* @i61_s
+	store i61 %x, ptr @i61_s
 	ret void
 }
 
 define void @i62_ls(i62 %x) nounwind  {
-	store i62 %x, i62* @i62_s
+	store i62 %x, ptr @i62_s
 	ret void
 }
 
 define void @i63_ls(i63 %x) nounwind  {
-	store i63 %x, i63* @i63_s
+	store i63 %x, ptr @i63_s
 	ret void
 }
 
 define void @i64_ls(i64 %x) nounwind  {
-	store i64 %x, i64* @i64_s
+	store i64 %x, ptr @i64_s
 	ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/APIntSextParam.ll b/llvm/test/CodeGen/NVPTX/APIntSextParam.ll
index 4594a5c0d4c64..dce1c249ca6b0 100644
--- a/llvm/test/CodeGen/NVPTX/APIntSextParam.ll
+++ b/llvm/test/CodeGen/NVPTX/APIntSextParam.ll
@@ -1,387 +1,387 @@
 ; RUN: llc -march=nvptx  < %s > %t
 ; RUN: llc -march=nvptx64  < %s > %t
 
- at i1_s = external global i1		; <i1*> [#uses=1]
- at i2_s = external global i2		; <i2*> [#uses=1]
- at i3_s = external global i3		; <i3*> [#uses=1]
- at i4_s = external global i4		; <i4*> [#uses=1]
- at i5_s = external global i5		; <i5*> [#uses=1]
- at i6_s = external global i6		; <i6*> [#uses=1]
- at i7_s = external global i7		; <i7*> [#uses=1]
- at i8_s = external global i8		; <i8*> [#uses=1]
- at i9_s = external global i9		; <i9*> [#uses=1]
- at i10_s = external global i10		; <i10*> [#uses=1]
- at i11_s = external global i11		; <i11*> [#uses=1]
- at i12_s = external global i12		; <i12*> [#uses=1]
- at i13_s = external global i13		; <i13*> [#uses=1]
- at i14_s = external global i14		; <i14*> [#uses=1]
- at i15_s = external global i15		; <i15*> [#uses=1]
- at i16_s = external global i16		; <i16*> [#uses=1]
- at i17_s = external global i17		; <i17*> [#uses=1]
- at i18_s = external global i18		; <i18*> [#uses=1]
- at i19_s = external global i19		; <i19*> [#uses=1]
- at i20_s = external global i20		; <i20*> [#uses=1]
- at i21_s = external global i21		; <i21*> [#uses=1]
- at i22_s = external global i22		; <i22*> [#uses=1]
- at i23_s = external global i23		; <i23*> [#uses=1]
- at i24_s = external global i24		; <i24*> [#uses=1]
- at i25_s = external global i25		; <i25*> [#uses=1]
- at i26_s = external global i26		; <i26*> [#uses=1]
- at i27_s = external global i27		; <i27*> [#uses=1]
- at i28_s = external global i28		; <i28*> [#uses=1]
- at i29_s = external global i29		; <i29*> [#uses=1]
- at i30_s = external global i30		; <i30*> [#uses=1]
- at i31_s = external global i31		; <i31*> [#uses=1]
- at i32_s = external global i32		; <i32*> [#uses=1]
- at i33_s = external global i33		; <i33*> [#uses=1]
- at i34_s = external global i34		; <i34*> [#uses=1]
- at i35_s = external global i35		; <i35*> [#uses=1]
- at i36_s = external global i36		; <i36*> [#uses=1]
- at i37_s = external global i37		; <i37*> [#uses=1]
- at i38_s = external global i38		; <i38*> [#uses=1]
- at i39_s = external global i39		; <i39*> [#uses=1]
- at i40_s = external global i40		; <i40*> [#uses=1]
- at i41_s = external global i41		; <i41*> [#uses=1]
- at i42_s = external global i42		; <i42*> [#uses=1]
- at i43_s = external global i43		; <i43*> [#uses=1]
- at i44_s = external global i44		; <i44*> [#uses=1]
- at i45_s = external global i45		; <i45*> [#uses=1]
- at i46_s = external global i46		; <i46*> [#uses=1]
- at i47_s = external global i47		; <i47*> [#uses=1]
- at i48_s = external global i48		; <i48*> [#uses=1]
- at i49_s = external global i49		; <i49*> [#uses=1]
- at i50_s = external global i50		; <i50*> [#uses=1]
- at i51_s = external global i51		; <i51*> [#uses=1]
- at i52_s = external global i52		; <i52*> [#uses=1]
- at i53_s = external global i53		; <i53*> [#uses=1]
- at i54_s = external global i54		; <i54*> [#uses=1]
- at i55_s = external global i55		; <i55*> [#uses=1]
- at i56_s = external global i56		; <i56*> [#uses=1]
- at i57_s = external global i57		; <i57*> [#uses=1]
- at i58_s = external global i58		; <i58*> [#uses=1]
- at i59_s = external global i59		; <i59*> [#uses=1]
- at i60_s = external global i60		; <i60*> [#uses=1]
- at i61_s = external global i61		; <i61*> [#uses=1]
- at i62_s = external global i62		; <i62*> [#uses=1]
- at i63_s = external global i63		; <i63*> [#uses=1]
- at i64_s = external global i64		; <i64*> [#uses=1]
+ at i1_s = external global i1		; <ptr> [#uses=1]
+ at i2_s = external global i2		; <ptr> [#uses=1]
+ at i3_s = external global i3		; <ptr> [#uses=1]
+ at i4_s = external global i4		; <ptr> [#uses=1]
+ at i5_s = external global i5		; <ptr> [#uses=1]
+ at i6_s = external global i6		; <ptr> [#uses=1]
+ at i7_s = external global i7		; <ptr> [#uses=1]
+ at i8_s = external global i8		; <ptr> [#uses=1]
+ at i9_s = external global i9		; <ptr> [#uses=1]
+ at i10_s = external global i10		; <ptr> [#uses=1]
+ at i11_s = external global i11		; <ptr> [#uses=1]
+ at i12_s = external global i12		; <ptr> [#uses=1]
+ at i13_s = external global i13		; <ptr> [#uses=1]
+ at i14_s = external global i14		; <ptr> [#uses=1]
+ at i15_s = external global i15		; <ptr> [#uses=1]
+ at i16_s = external global i16		; <ptr> [#uses=1]
+ at i17_s = external global i17		; <ptr> [#uses=1]
+ at i18_s = external global i18		; <ptr> [#uses=1]
+ at i19_s = external global i19		; <ptr> [#uses=1]
+ at i20_s = external global i20		; <ptr> [#uses=1]
+ at i21_s = external global i21		; <ptr> [#uses=1]
+ at i22_s = external global i22		; <ptr> [#uses=1]
+ at i23_s = external global i23		; <ptr> [#uses=1]
+ at i24_s = external global i24		; <ptr> [#uses=1]
+ at i25_s = external global i25		; <ptr> [#uses=1]
+ at i26_s = external global i26		; <ptr> [#uses=1]
+ at i27_s = external global i27		; <ptr> [#uses=1]
+ at i28_s = external global i28		; <ptr> [#uses=1]
+ at i29_s = external global i29		; <ptr> [#uses=1]
+ at i30_s = external global i30		; <ptr> [#uses=1]
+ at i31_s = external global i31		; <ptr> [#uses=1]
+ at i32_s = external global i32		; <ptr> [#uses=1]
+ at i33_s = external global i33		; <ptr> [#uses=1]
+ at i34_s = external global i34		; <ptr> [#uses=1]
+ at i35_s = external global i35		; <ptr> [#uses=1]
+ at i36_s = external global i36		; <ptr> [#uses=1]
+ at i37_s = external global i37		; <ptr> [#uses=1]
+ at i38_s = external global i38		; <ptr> [#uses=1]
+ at i39_s = external global i39		; <ptr> [#uses=1]
+ at i40_s = external global i40		; <ptr> [#uses=1]
+ at i41_s = external global i41		; <ptr> [#uses=1]
+ at i42_s = external global i42		; <ptr> [#uses=1]
+ at i43_s = external global i43		; <ptr> [#uses=1]
+ at i44_s = external global i44		; <ptr> [#uses=1]
+ at i45_s = external global i45		; <ptr> [#uses=1]
+ at i46_s = external global i46		; <ptr> [#uses=1]
+ at i47_s = external global i47		; <ptr> [#uses=1]
+ at i48_s = external global i48		; <ptr> [#uses=1]
+ at i49_s = external global i49		; <ptr> [#uses=1]
+ at i50_s = external global i50		; <ptr> [#uses=1]
+ at i51_s = external global i51		; <ptr> [#uses=1]
+ at i52_s = external global i52		; <ptr> [#uses=1]
+ at i53_s = external global i53		; <ptr> [#uses=1]
+ at i54_s = external global i54		; <ptr> [#uses=1]
+ at i55_s = external global i55		; <ptr> [#uses=1]
+ at i56_s = external global i56		; <ptr> [#uses=1]
+ at i57_s = external global i57		; <ptr> [#uses=1]
+ at i58_s = external global i58		; <ptr> [#uses=1]
+ at i59_s = external global i59		; <ptr> [#uses=1]
+ at i60_s = external global i60		; <ptr> [#uses=1]
+ at i61_s = external global i61		; <ptr> [#uses=1]
+ at i62_s = external global i62		; <ptr> [#uses=1]
+ at i63_s = external global i63		; <ptr> [#uses=1]
+ at i64_s = external global i64		; <ptr> [#uses=1]
 
 define void @i1_ls(i1 signext %x) nounwind  {
-	store i1 %x, i1* @i1_s
+	store i1 %x, ptr @i1_s
 	ret void
 }
 
 define void @i2_ls(i2 signext %x) nounwind  {
-	store i2 %x, i2* @i2_s
+	store i2 %x, ptr @i2_s
 	ret void
 }
 
 define void @i3_ls(i3 signext %x) nounwind  {
-	store i3 %x, i3* @i3_s
+	store i3 %x, ptr @i3_s
 	ret void
 }
 
 define void @i4_ls(i4 signext %x) nounwind  {
-	store i4 %x, i4* @i4_s
+	store i4 %x, ptr @i4_s
 	ret void
 }
 
 define void @i5_ls(i5 signext %x) nounwind  {
-	store i5 %x, i5* @i5_s
+	store i5 %x, ptr @i5_s
 	ret void
 }
 
 define void @i6_ls(i6 signext %x) nounwind  {
-	store i6 %x, i6* @i6_s
+	store i6 %x, ptr @i6_s
 	ret void
 }
 
 define void @i7_ls(i7 signext %x) nounwind  {
-	store i7 %x, i7* @i7_s
+	store i7 %x, ptr @i7_s
 	ret void
 }
 
 define void @i8_ls(i8 signext %x) nounwind  {
-	store i8 %x, i8* @i8_s
+	store i8 %x, ptr @i8_s
 	ret void
 }
 
 define void @i9_ls(i9 signext %x) nounwind  {
-	store i9 %x, i9* @i9_s
+	store i9 %x, ptr @i9_s
 	ret void
 }
 
 define void @i10_ls(i10 signext %x) nounwind  {
-	store i10 %x, i10* @i10_s
+	store i10 %x, ptr @i10_s
 	ret void
 }
 
 define void @i11_ls(i11 signext %x) nounwind  {
-	store i11 %x, i11* @i11_s
+	store i11 %x, ptr @i11_s
 	ret void
 }
 
 define void @i12_ls(i12 signext %x) nounwind  {
-	store i12 %x, i12* @i12_s
+	store i12 %x, ptr @i12_s
 	ret void
 }
 
 define void @i13_ls(i13 signext %x) nounwind  {
-	store i13 %x, i13* @i13_s
+	store i13 %x, ptr @i13_s
 	ret void
 }
 
 define void @i14_ls(i14 signext %x) nounwind  {
-	store i14 %x, i14* @i14_s
+	store i14 %x, ptr @i14_s
 	ret void
 }
 
 define void @i15_ls(i15 signext %x) nounwind  {
-	store i15 %x, i15* @i15_s
+	store i15 %x, ptr @i15_s
 	ret void
 }
 
 define void @i16_ls(i16 signext %x) nounwind  {
-	store i16 %x, i16* @i16_s
+	store i16 %x, ptr @i16_s
 	ret void
 }
 
 define void @i17_ls(i17 signext %x) nounwind  {
-	store i17 %x, i17* @i17_s
+	store i17 %x, ptr @i17_s
 	ret void
 }
 
 define void @i18_ls(i18 signext %x) nounwind  {
-	store i18 %x, i18* @i18_s
+	store i18 %x, ptr @i18_s
 	ret void
 }
 
 define void @i19_ls(i19 signext %x) nounwind  {
-	store i19 %x, i19* @i19_s
+	store i19 %x, ptr @i19_s
 	ret void
 }
 
 define void @i20_ls(i20 signext %x) nounwind  {
-	store i20 %x, i20* @i20_s
+	store i20 %x, ptr @i20_s
 	ret void
 }
 
 define void @i21_ls(i21 signext %x) nounwind  {
-	store i21 %x, i21* @i21_s
+	store i21 %x, ptr @i21_s
 	ret void
 }
 
 define void @i22_ls(i22 signext %x) nounwind  {
-	store i22 %x, i22* @i22_s
+	store i22 %x, ptr @i22_s
 	ret void
 }
 
 define void @i23_ls(i23 signext %x) nounwind  {
-	store i23 %x, i23* @i23_s
+	store i23 %x, ptr @i23_s
 	ret void
 }
 
 define void @i24_ls(i24 signext %x) nounwind  {
-	store i24 %x, i24* @i24_s
+	store i24 %x, ptr @i24_s
 	ret void
 }
 
 define void @i25_ls(i25 signext %x) nounwind  {
-	store i25 %x, i25* @i25_s
+	store i25 %x, ptr @i25_s
 	ret void
 }
 
 define void @i26_ls(i26 signext %x) nounwind  {
-	store i26 %x, i26* @i26_s
+	store i26 %x, ptr @i26_s
 	ret void
 }
 
 define void @i27_ls(i27 signext %x) nounwind  {
-	store i27 %x, i27* @i27_s
+	store i27 %x, ptr @i27_s
 	ret void
 }
 
 define void @i28_ls(i28 signext %x) nounwind  {
-	store i28 %x, i28* @i28_s
+	store i28 %x, ptr @i28_s
 	ret void
 }
 
 define void @i29_ls(i29 signext %x) nounwind  {
-	store i29 %x, i29* @i29_s
+	store i29 %x, ptr @i29_s
 	ret void
 }
 
 define void @i30_ls(i30 signext %x) nounwind  {
-	store i30 %x, i30* @i30_s
+	store i30 %x, ptr @i30_s
 	ret void
 }
 
 define void @i31_ls(i31 signext %x) nounwind  {
-	store i31 %x, i31* @i31_s
+	store i31 %x, ptr @i31_s
 	ret void
 }
 
 define void @i32_ls(i32 signext %x) nounwind  {
-	store i32 %x, i32* @i32_s
+	store i32 %x, ptr @i32_s
 	ret void
 }
 
 define void @i33_ls(i33 signext %x) nounwind  {
-	store i33 %x, i33* @i33_s
+	store i33 %x, ptr @i33_s
 	ret void
 }
 
 define void @i34_ls(i34 signext %x) nounwind  {
-	store i34 %x, i34* @i34_s
+	store i34 %x, ptr @i34_s
 	ret void
 }
 
 define void @i35_ls(i35 signext %x) nounwind  {
-	store i35 %x, i35* @i35_s
+	store i35 %x, ptr @i35_s
 	ret void
 }
 
 define void @i36_ls(i36 signext %x) nounwind  {
-	store i36 %x, i36* @i36_s
+	store i36 %x, ptr @i36_s
 	ret void
 }
 
 define void @i37_ls(i37 signext %x) nounwind  {
-	store i37 %x, i37* @i37_s
+	store i37 %x, ptr @i37_s
 	ret void
 }
 
 define void @i38_ls(i38 signext %x) nounwind  {
-	store i38 %x, i38* @i38_s
+	store i38 %x, ptr @i38_s
 	ret void
 }
 
 define void @i39_ls(i39 signext %x) nounwind  {
-	store i39 %x, i39* @i39_s
+	store i39 %x, ptr @i39_s
 	ret void
 }
 
 define void @i40_ls(i40 signext %x) nounwind  {
-	store i40 %x, i40* @i40_s
+	store i40 %x, ptr @i40_s
 	ret void
 }
 
 define void @i41_ls(i41 signext %x) nounwind  {
-	store i41 %x, i41* @i41_s
+	store i41 %x, ptr @i41_s
 	ret void
 }
 
 define void @i42_ls(i42 signext %x) nounwind  {
-	store i42 %x, i42* @i42_s
+	store i42 %x, ptr @i42_s
 	ret void
 }
 
 define void @i43_ls(i43 signext %x) nounwind  {
-	store i43 %x, i43* @i43_s
+	store i43 %x, ptr @i43_s
 	ret void
 }
 
 define void @i44_ls(i44 signext %x) nounwind  {
-	store i44 %x, i44* @i44_s
+	store i44 %x, ptr @i44_s
 	ret void
 }
 
 define void @i45_ls(i45 signext %x) nounwind  {
-	store i45 %x, i45* @i45_s
+	store i45 %x, ptr @i45_s
 	ret void
 }
 
 define void @i46_ls(i46 signext %x) nounwind  {
-	store i46 %x, i46* @i46_s
+	store i46 %x, ptr @i46_s
 	ret void
 }
 
 define void @i47_ls(i47 signext %x) nounwind  {
-	store i47 %x, i47* @i47_s
+	store i47 %x, ptr @i47_s
 	ret void
 }
 
 define void @i48_ls(i48 signext %x) nounwind  {
-	store i48 %x, i48* @i48_s
+	store i48 %x, ptr @i48_s
 	ret void
 }
 
 define void @i49_ls(i49 signext %x) nounwind  {
-	store i49 %x, i49* @i49_s
+	store i49 %x, ptr @i49_s
 	ret void
 }
 
 define void @i50_ls(i50 signext %x) nounwind  {
-	store i50 %x, i50* @i50_s
+	store i50 %x, ptr @i50_s
 	ret void
 }
 
 define void @i51_ls(i51 signext %x) nounwind  {
-	store i51 %x, i51* @i51_s
+	store i51 %x, ptr @i51_s
 	ret void
 }
 
 define void @i52_ls(i52 signext %x) nounwind  {
-	store i52 %x, i52* @i52_s
+	store i52 %x, ptr @i52_s
 	ret void
 }
 
 define void @i53_ls(i53 signext %x) nounwind  {
-	store i53 %x, i53* @i53_s
+	store i53 %x, ptr @i53_s
 	ret void
 }
 
 define void @i54_ls(i54 signext %x) nounwind  {
-	store i54 %x, i54* @i54_s
+	store i54 %x, ptr @i54_s
 	ret void
 }
 
 define void @i55_ls(i55 signext %x) nounwind  {
-	store i55 %x, i55* @i55_s
+	store i55 %x, ptr @i55_s
 	ret void
 }
 
 define void @i56_ls(i56 signext %x) nounwind  {
-	store i56 %x, i56* @i56_s
+	store i56 %x, ptr @i56_s
 	ret void
 }
 
 define void @i57_ls(i57 signext %x) nounwind  {
-	store i57 %x, i57* @i57_s
+	store i57 %x, ptr @i57_s
 	ret void
 }
 
 define void @i58_ls(i58 signext %x) nounwind  {
-	store i58 %x, i58* @i58_s
+	store i58 %x, ptr @i58_s
 	ret void
 }
 
 define void @i59_ls(i59 signext %x) nounwind  {
-	store i59 %x, i59* @i59_s
+	store i59 %x, ptr @i59_s
 	ret void
 }
 
 define void @i60_ls(i60 signext %x) nounwind  {
-	store i60 %x, i60* @i60_s
+	store i60 %x, ptr @i60_s
 	ret void
 }
 
 define void @i61_ls(i61 signext %x) nounwind  {
-	store i61 %x, i61* @i61_s
+	store i61 %x, ptr @i61_s
 	ret void
 }
 
 define void @i62_ls(i62 signext %x) nounwind  {
-	store i62 %x, i62* @i62_s
+	store i62 %x, ptr @i62_s
 	ret void
 }
 
 define void @i63_ls(i63 signext %x) nounwind  {
-	store i63 %x, i63* @i63_s
+	store i63 %x, ptr @i63_s
 	ret void
 }
 
 define void @i64_ls(i64 signext %x) nounwind  {
-	store i64 %x, i64* @i64_s
+	store i64 %x, ptr @i64_s
 	ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/APIntZextParam.ll b/llvm/test/CodeGen/NVPTX/APIntZextParam.ll
index 403462017fb44..9a8517097a00e 100644
--- a/llvm/test/CodeGen/NVPTX/APIntZextParam.ll
+++ b/llvm/test/CodeGen/NVPTX/APIntZextParam.ll
@@ -1,387 +1,387 @@
 ; RUN: llc -march=nvptx  < %s > %t
 ; RUN: llc -march=nvptx64  < %s > %t
 
- at i1_s = external global i1		; <i1*> [#uses=1]
- at i2_s = external global i2		; <i2*> [#uses=1]
- at i3_s = external global i3		; <i3*> [#uses=1]
- at i4_s = external global i4		; <i4*> [#uses=1]
- at i5_s = external global i5		; <i5*> [#uses=1]
- at i6_s = external global i6		; <i6*> [#uses=1]
- at i7_s = external global i7		; <i7*> [#uses=1]
- at i8_s = external global i8		; <i8*> [#uses=1]
- at i9_s = external global i9		; <i9*> [#uses=1]
- at i10_s = external global i10		; <i10*> [#uses=1]
- at i11_s = external global i11		; <i11*> [#uses=1]
- at i12_s = external global i12		; <i12*> [#uses=1]
- at i13_s = external global i13		; <i13*> [#uses=1]
- at i14_s = external global i14		; <i14*> [#uses=1]
- at i15_s = external global i15		; <i15*> [#uses=1]
- at i16_s = external global i16		; <i16*> [#uses=1]
- at i17_s = external global i17		; <i17*> [#uses=1]
- at i18_s = external global i18		; <i18*> [#uses=1]
- at i19_s = external global i19		; <i19*> [#uses=1]
- at i20_s = external global i20		; <i20*> [#uses=1]
- at i21_s = external global i21		; <i21*> [#uses=1]
- at i22_s = external global i22		; <i22*> [#uses=1]
- at i23_s = external global i23		; <i23*> [#uses=1]
- at i24_s = external global i24		; <i24*> [#uses=1]
- at i25_s = external global i25		; <i25*> [#uses=1]
- at i26_s = external global i26		; <i26*> [#uses=1]
- at i27_s = external global i27		; <i27*> [#uses=1]
- at i28_s = external global i28		; <i28*> [#uses=1]
- at i29_s = external global i29		; <i29*> [#uses=1]
- at i30_s = external global i30		; <i30*> [#uses=1]
- at i31_s = external global i31		; <i31*> [#uses=1]
- at i32_s = external global i32		; <i32*> [#uses=1]
- at i33_s = external global i33		; <i33*> [#uses=1]
- at i34_s = external global i34		; <i34*> [#uses=1]
- at i35_s = external global i35		; <i35*> [#uses=1]
- at i36_s = external global i36		; <i36*> [#uses=1]
- at i37_s = external global i37		; <i37*> [#uses=1]
- at i38_s = external global i38		; <i38*> [#uses=1]
- at i39_s = external global i39		; <i39*> [#uses=1]
- at i40_s = external global i40		; <i40*> [#uses=1]
- at i41_s = external global i41		; <i41*> [#uses=1]
- at i42_s = external global i42		; <i42*> [#uses=1]
- at i43_s = external global i43		; <i43*> [#uses=1]
- at i44_s = external global i44		; <i44*> [#uses=1]
- at i45_s = external global i45		; <i45*> [#uses=1]
- at i46_s = external global i46		; <i46*> [#uses=1]
- at i47_s = external global i47		; <i47*> [#uses=1]
- at i48_s = external global i48		; <i48*> [#uses=1]
- at i49_s = external global i49		; <i49*> [#uses=1]
- at i50_s = external global i50		; <i50*> [#uses=1]
- at i51_s = external global i51		; <i51*> [#uses=1]
- at i52_s = external global i52		; <i52*> [#uses=1]
- at i53_s = external global i53		; <i53*> [#uses=1]
- at i54_s = external global i54		; <i54*> [#uses=1]
- at i55_s = external global i55		; <i55*> [#uses=1]
- at i56_s = external global i56		; <i56*> [#uses=1]
- at i57_s = external global i57		; <i57*> [#uses=1]
- at i58_s = external global i58		; <i58*> [#uses=1]
- at i59_s = external global i59		; <i59*> [#uses=1]
- at i60_s = external global i60		; <i60*> [#uses=1]
- at i61_s = external global i61		; <i61*> [#uses=1]
- at i62_s = external global i62		; <i62*> [#uses=1]
- at i63_s = external global i63		; <i63*> [#uses=1]
- at i64_s = external global i64		; <i64*> [#uses=1]
+ at i1_s = external global i1		; <ptr> [#uses=1]
+ at i2_s = external global i2		; <ptr> [#uses=1]
+ at i3_s = external global i3		; <ptr> [#uses=1]
+ at i4_s = external global i4		; <ptr> [#uses=1]
+ at i5_s = external global i5		; <ptr> [#uses=1]
+ at i6_s = external global i6		; <ptr> [#uses=1]
+ at i7_s = external global i7		; <ptr> [#uses=1]
+ at i8_s = external global i8		; <ptr> [#uses=1]
+ at i9_s = external global i9		; <ptr> [#uses=1]
+ at i10_s = external global i10		; <ptr> [#uses=1]
+ at i11_s = external global i11		; <ptr> [#uses=1]
+ at i12_s = external global i12		; <ptr> [#uses=1]
+ at i13_s = external global i13		; <ptr> [#uses=1]
+ at i14_s = external global i14		; <ptr> [#uses=1]
+ at i15_s = external global i15		; <ptr> [#uses=1]
+ at i16_s = external global i16		; <ptr> [#uses=1]
+ at i17_s = external global i17		; <ptr> [#uses=1]
+ at i18_s = external global i18		; <ptr> [#uses=1]
+ at i19_s = external global i19		; <ptr> [#uses=1]
+ at i20_s = external global i20		; <ptr> [#uses=1]
+ at i21_s = external global i21		; <ptr> [#uses=1]
+ at i22_s = external global i22		; <ptr> [#uses=1]
+ at i23_s = external global i23		; <ptr> [#uses=1]
+ at i24_s = external global i24		; <ptr> [#uses=1]
+ at i25_s = external global i25		; <ptr> [#uses=1]
+ at i26_s = external global i26		; <ptr> [#uses=1]
+ at i27_s = external global i27		; <ptr> [#uses=1]
+ at i28_s = external global i28		; <ptr> [#uses=1]
+ at i29_s = external global i29		; <ptr> [#uses=1]
+ at i30_s = external global i30		; <ptr> [#uses=1]
+ at i31_s = external global i31		; <ptr> [#uses=1]
+ at i32_s = external global i32		; <ptr> [#uses=1]
+ at i33_s = external global i33		; <ptr> [#uses=1]
+ at i34_s = external global i34		; <ptr> [#uses=1]
+ at i35_s = external global i35		; <ptr> [#uses=1]
+ at i36_s = external global i36		; <ptr> [#uses=1]
+ at i37_s = external global i37		; <ptr> [#uses=1]
+ at i38_s = external global i38		; <ptr> [#uses=1]
+ at i39_s = external global i39		; <ptr> [#uses=1]
+ at i40_s = external global i40		; <ptr> [#uses=1]
+ at i41_s = external global i41		; <ptr> [#uses=1]
+ at i42_s = external global i42		; <ptr> [#uses=1]
+ at i43_s = external global i43		; <ptr> [#uses=1]
+ at i44_s = external global i44		; <ptr> [#uses=1]
+ at i45_s = external global i45		; <ptr> [#uses=1]
+ at i46_s = external global i46		; <ptr> [#uses=1]
+ at i47_s = external global i47		; <ptr> [#uses=1]
+ at i48_s = external global i48		; <ptr> [#uses=1]
+ at i49_s = external global i49		; <ptr> [#uses=1]
+ at i50_s = external global i50		; <ptr> [#uses=1]
+ at i51_s = external global i51		; <ptr> [#uses=1]
+ at i52_s = external global i52		; <ptr> [#uses=1]
+ at i53_s = external global i53		; <ptr> [#uses=1]
+ at i54_s = external global i54		; <ptr> [#uses=1]
+ at i55_s = external global i55		; <ptr> [#uses=1]
+ at i56_s = external global i56		; <ptr> [#uses=1]
+ at i57_s = external global i57		; <ptr> [#uses=1]
+ at i58_s = external global i58		; <ptr> [#uses=1]
+ at i59_s = external global i59		; <ptr> [#uses=1]
+ at i60_s = external global i60		; <ptr> [#uses=1]
+ at i61_s = external global i61		; <ptr> [#uses=1]
+ at i62_s = external global i62		; <ptr> [#uses=1]
+ at i63_s = external global i63		; <ptr> [#uses=1]
+ at i64_s = external global i64		; <ptr> [#uses=1]
 
 define void @i1_ls(i1 zeroext %x) nounwind  {
-	store i1 %x, i1* @i1_s
+	store i1 %x, ptr @i1_s
 	ret void
 }
 
 define void @i2_ls(i2 zeroext %x) nounwind  {
-	store i2 %x, i2* @i2_s
+	store i2 %x, ptr @i2_s
 	ret void
 }
 
 define void @i3_ls(i3 zeroext %x) nounwind  {
-	store i3 %x, i3* @i3_s
+	store i3 %x, ptr @i3_s
 	ret void
 }
 
 define void @i4_ls(i4 zeroext %x) nounwind  {
-	store i4 %x, i4* @i4_s
+	store i4 %x, ptr @i4_s
 	ret void
 }
 
 define void @i5_ls(i5 zeroext %x) nounwind  {
-	store i5 %x, i5* @i5_s
+	store i5 %x, ptr @i5_s
 	ret void
 }
 
 define void @i6_ls(i6 zeroext %x) nounwind  {
-	store i6 %x, i6* @i6_s
+	store i6 %x, ptr @i6_s
 	ret void
 }
 
 define void @i7_ls(i7 zeroext %x) nounwind  {
-	store i7 %x, i7* @i7_s
+	store i7 %x, ptr @i7_s
 	ret void
 }
 
 define void @i8_ls(i8 zeroext %x) nounwind  {
-	store i8 %x, i8* @i8_s
+	store i8 %x, ptr @i8_s
 	ret void
 }
 
 define void @i9_ls(i9 zeroext %x) nounwind  {
-	store i9 %x, i9* @i9_s
+	store i9 %x, ptr @i9_s
 	ret void
 }
 
 define void @i10_ls(i10 zeroext %x) nounwind  {
-	store i10 %x, i10* @i10_s
+	store i10 %x, ptr @i10_s
 	ret void
 }
 
 define void @i11_ls(i11 zeroext %x) nounwind  {
-	store i11 %x, i11* @i11_s
+	store i11 %x, ptr @i11_s
 	ret void
 }
 
 define void @i12_ls(i12 zeroext %x) nounwind  {
-	store i12 %x, i12* @i12_s
+	store i12 %x, ptr @i12_s
 	ret void
 }
 
 define void @i13_ls(i13 zeroext %x) nounwind  {
-	store i13 %x, i13* @i13_s
+	store i13 %x, ptr @i13_s
 	ret void
 }
 
 define void @i14_ls(i14 zeroext %x) nounwind  {
-	store i14 %x, i14* @i14_s
+	store i14 %x, ptr @i14_s
 	ret void
 }
 
 define void @i15_ls(i15 zeroext %x) nounwind  {
-	store i15 %x, i15* @i15_s
+	store i15 %x, ptr @i15_s
 	ret void
 }
 
 define void @i16_ls(i16 zeroext %x) nounwind  {
-	store i16 %x, i16* @i16_s
+	store i16 %x, ptr @i16_s
 	ret void
 }
 
 define void @i17_ls(i17 zeroext %x) nounwind  {
-	store i17 %x, i17* @i17_s
+	store i17 %x, ptr @i17_s
 	ret void
 }
 
 define void @i18_ls(i18 zeroext %x) nounwind  {
-	store i18 %x, i18* @i18_s
+	store i18 %x, ptr @i18_s
 	ret void
 }
 
 define void @i19_ls(i19 zeroext %x) nounwind  {
-	store i19 %x, i19* @i19_s
+	store i19 %x, ptr @i19_s
 	ret void
 }
 
 define void @i20_ls(i20 zeroext %x) nounwind  {
-	store i20 %x, i20* @i20_s
+	store i20 %x, ptr @i20_s
 	ret void
 }
 
 define void @i21_ls(i21 zeroext %x) nounwind  {
-	store i21 %x, i21* @i21_s
+	store i21 %x, ptr @i21_s
 	ret void
 }
 
 define void @i22_ls(i22 zeroext %x) nounwind  {
-	store i22 %x, i22* @i22_s
+	store i22 %x, ptr @i22_s
 	ret void
 }
 
 define void @i23_ls(i23 zeroext %x) nounwind  {
-	store i23 %x, i23* @i23_s
+	store i23 %x, ptr @i23_s
 	ret void
 }
 
 define void @i24_ls(i24 zeroext %x) nounwind  {
-	store i24 %x, i24* @i24_s
+	store i24 %x, ptr @i24_s
 	ret void
 }
 
 define void @i25_ls(i25 zeroext %x) nounwind  {
-	store i25 %x, i25* @i25_s
+	store i25 %x, ptr @i25_s
 	ret void
 }
 
 define void @i26_ls(i26 zeroext %x) nounwind  {
-	store i26 %x, i26* @i26_s
+	store i26 %x, ptr @i26_s
 	ret void
 }
 
 define void @i27_ls(i27 zeroext %x) nounwind  {
-	store i27 %x, i27* @i27_s
+	store i27 %x, ptr @i27_s
 	ret void
 }
 
 define void @i28_ls(i28 zeroext %x) nounwind  {
-	store i28 %x, i28* @i28_s
+	store i28 %x, ptr @i28_s
 	ret void
 }
 
 define void @i29_ls(i29 zeroext %x) nounwind  {
-	store i29 %x, i29* @i29_s
+	store i29 %x, ptr @i29_s
 	ret void
 }
 
 define void @i30_ls(i30 zeroext %x) nounwind  {
-	store i30 %x, i30* @i30_s
+	store i30 %x, ptr @i30_s
 	ret void
 }
 
 define void @i31_ls(i31 zeroext %x) nounwind  {
-	store i31 %x, i31* @i31_s
+	store i31 %x, ptr @i31_s
 	ret void
 }
 
 define void @i32_ls(i32 zeroext %x) nounwind  {
-	store i32 %x, i32* @i32_s
+	store i32 %x, ptr @i32_s
 	ret void
 }
 
 define void @i33_ls(i33 zeroext %x) nounwind  {
-	store i33 %x, i33* @i33_s
+	store i33 %x, ptr @i33_s
 	ret void
 }
 
 define void @i34_ls(i34 zeroext %x) nounwind  {
-	store i34 %x, i34* @i34_s
+	store i34 %x, ptr @i34_s
 	ret void
 }
 
 define void @i35_ls(i35 zeroext %x) nounwind  {
-	store i35 %x, i35* @i35_s
+	store i35 %x, ptr @i35_s
 	ret void
 }
 
 define void @i36_ls(i36 zeroext %x) nounwind  {
-	store i36 %x, i36* @i36_s
+	store i36 %x, ptr @i36_s
 	ret void
 }
 
 define void @i37_ls(i37 zeroext %x) nounwind  {
-	store i37 %x, i37* @i37_s
+	store i37 %x, ptr @i37_s
 	ret void
 }
 
 define void @i38_ls(i38 zeroext %x) nounwind  {
-	store i38 %x, i38* @i38_s
+	store i38 %x, ptr @i38_s
 	ret void
 }
 
 define void @i39_ls(i39 zeroext %x) nounwind  {
-	store i39 %x, i39* @i39_s
+	store i39 %x, ptr @i39_s
 	ret void
 }
 
 define void @i40_ls(i40 zeroext %x) nounwind  {
-	store i40 %x, i40* @i40_s
+	store i40 %x, ptr @i40_s
 	ret void
 }
 
 define void @i41_ls(i41 zeroext %x) nounwind  {
-	store i41 %x, i41* @i41_s
+	store i41 %x, ptr @i41_s
 	ret void
 }
 
 define void @i42_ls(i42 zeroext %x) nounwind  {
-	store i42 %x, i42* @i42_s
+	store i42 %x, ptr @i42_s
 	ret void
 }
 
 define void @i43_ls(i43 zeroext %x) nounwind  {
-	store i43 %x, i43* @i43_s
+	store i43 %x, ptr @i43_s
 	ret void
 }
 
 define void @i44_ls(i44 zeroext %x) nounwind  {
-	store i44 %x, i44* @i44_s
+	store i44 %x, ptr @i44_s
 	ret void
 }
 
 define void @i45_ls(i45 zeroext %x) nounwind  {
-	store i45 %x, i45* @i45_s
+	store i45 %x, ptr @i45_s
 	ret void
 }
 
 define void @i46_ls(i46 zeroext %x) nounwind  {
-	store i46 %x, i46* @i46_s
+	store i46 %x, ptr @i46_s
 	ret void
 }
 
 define void @i47_ls(i47 zeroext %x) nounwind  {
-	store i47 %x, i47* @i47_s
+	store i47 %x, ptr @i47_s
 	ret void
 }
 
 define void @i48_ls(i48 zeroext %x) nounwind  {
-	store i48 %x, i48* @i48_s
+	store i48 %x, ptr @i48_s
 	ret void
 }
 
 define void @i49_ls(i49 zeroext %x) nounwind  {
-	store i49 %x, i49* @i49_s
+	store i49 %x, ptr @i49_s
 	ret void
 }
 
 define void @i50_ls(i50 zeroext %x) nounwind  {
-	store i50 %x, i50* @i50_s
+	store i50 %x, ptr @i50_s
 	ret void
 }
 
 define void @i51_ls(i51 zeroext %x) nounwind  {
-	store i51 %x, i51* @i51_s
+	store i51 %x, ptr @i51_s
 	ret void
 }
 
 define void @i52_ls(i52 zeroext %x) nounwind  {
-	store i52 %x, i52* @i52_s
+	store i52 %x, ptr @i52_s
 	ret void
 }
 
 define void @i53_ls(i53 zeroext %x) nounwind  {
-	store i53 %x, i53* @i53_s
+	store i53 %x, ptr @i53_s
 	ret void
 }
 
 define void @i54_ls(i54 zeroext %x) nounwind  {
-	store i54 %x, i54* @i54_s
+	store i54 %x, ptr @i54_s
 	ret void
 }
 
 define void @i55_ls(i55 zeroext %x) nounwind  {
-	store i55 %x, i55* @i55_s
+	store i55 %x, ptr @i55_s
 	ret void
 }
 
 define void @i56_ls(i56 zeroext %x) nounwind  {
-	store i56 %x, i56* @i56_s
+	store i56 %x, ptr @i56_s
 	ret void
 }
 
 define void @i57_ls(i57 zeroext %x) nounwind  {
-	store i57 %x, i57* @i57_s
+	store i57 %x, ptr @i57_s
 	ret void
 }
 
 define void @i58_ls(i58 zeroext %x) nounwind  {
-	store i58 %x, i58* @i58_s
+	store i58 %x, ptr @i58_s
 	ret void
 }
 
 define void @i59_ls(i59 zeroext %x) nounwind  {
-	store i59 %x, i59* @i59_s
+	store i59 %x, ptr @i59_s
 	ret void
 }
 
 define void @i60_ls(i60 zeroext %x) nounwind  {
-	store i60 %x, i60* @i60_s
+	store i60 %x, ptr @i60_s
 	ret void
 }
 
 define void @i61_ls(i61 zeroext %x) nounwind  {
-	store i61 %x, i61* @i61_s
+	store i61 %x, ptr @i61_s
 	ret void
 }
 
 define void @i62_ls(i62 zeroext %x) nounwind  {
-	store i62 %x, i62* @i62_s
+	store i62 %x, ptr @i62_s
 	ret void
 }
 
 define void @i63_ls(i63 zeroext %x) nounwind  {
-	store i63 %x, i63* @i63_s
+	store i63 %x, ptr @i63_s
 	ret void
 }
 
 define void @i64_ls(i64 zeroext %x) nounwind  {
-	store i64 %x, i64* @i64_s
+	store i64 %x, ptr @i64_s
 	ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
index 94f7991448af9..4f13b6d9d1a8a 100644
--- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
+++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -11,24 +11,24 @@ target triple = "nvptx64-nvidia-cuda"
 ; ENABLED: ld.v2.{{.}}32
 ; DISABLED: ld.{{.}}32
 ; DISABLED: ld.{{.}}32
-define i32 @f(i32* %p) {
-  %p.1 = getelementptr i32, i32* %p, i32 1
-  %v0 = load i32, i32* %p, align 8
-  %v1 = load i32, i32* %p.1, align 4
+define i32 @f(ptr %p) {
+  %p.1 = getelementptr i32, ptr %p, i32 1
+  %v0 = load i32, ptr %p, align 8
+  %v1 = load i32, ptr %p.1, align 4
   %sum = add i32 %v0, %v1
   ret i32 %sum
 }
 
-define half @fh(half* %p) {
-  %p.1 = getelementptr half, half* %p, i32 1
-  %p.2 = getelementptr half, half* %p, i32 2
-  %p.3 = getelementptr half, half* %p, i32 3
-  %p.4 = getelementptr half, half* %p, i32 4
-  %v0 = load half, half* %p, align 64
-  %v1 = load half, half* %p.1, align 4
-  %v2 = load half, half* %p.2, align 4
-  %v3 = load half, half* %p.3, align 4
-  %v4 = load half, half* %p.4, align 4
+define half @fh(ptr %p) {
+  %p.1 = getelementptr half, ptr %p, i32 1
+  %p.2 = getelementptr half, ptr %p, i32 2
+  %p.3 = getelementptr half, ptr %p, i32 3
+  %p.4 = getelementptr half, ptr %p, i32 4
+  %v0 = load half, ptr %p, align 64
+  %v1 = load half, ptr %p.1, align 4
+  %v2 = load half, ptr %p.2, align 4
+  %v3 = load half, ptr %p.3, align 4
+  %v4 = load half, ptr %p.4, align 4
   %sum1 = fadd half %v0, %v1
   %sum2 = fadd half %v2, %v3
   %sum3 = fadd half %sum1, %sum2
@@ -36,16 +36,16 @@ define half @fh(half* %p) {
   ret half %sum
 }
 
-define float @ff(float* %p) {
-  %p.1 = getelementptr float, float* %p, i32 1
-  %p.2 = getelementptr float, float* %p, i32 2
-  %p.3 = getelementptr float, float* %p, i32 3
-  %p.4 = getelementptr float, float* %p, i32 4
-  %v0 = load float, float* %p, align 64
-  %v1 = load float, float* %p.1, align 4
-  %v2 = load float, float* %p.2, align 4
-  %v3 = load float, float* %p.3, align 4
-  %v4 = load float, float* %p.4, align 4
+define float @ff(ptr %p) {
+  %p.1 = getelementptr float, ptr %p, i32 1
+  %p.2 = getelementptr float, ptr %p, i32 2
+  %p.3 = getelementptr float, ptr %p, i32 3
+  %p.4 = getelementptr float, ptr %p, i32 4
+  %v0 = load float, ptr %p, align 64
+  %v1 = load float, ptr %p.1, align 4
+  %v2 = load float, ptr %p.2, align 4
+  %v3 = load float, ptr %p.3, align 4
+  %v4 = load float, ptr %p.4, align 4
   %sum1 = fadd float %v0, %v1
   %sum2 = fadd float %v2, %v3
   %sum3 = fadd float %sum1, %sum2

diff  --git a/llvm/test/CodeGen/NVPTX/MachineSink-call.ll b/llvm/test/CodeGen/NVPTX/MachineSink-call.ll
index 72c370222ad3c..ee2535f16fc86 100644
--- a/llvm/test/CodeGen/NVPTX/MachineSink-call.ll
+++ b/llvm/test/CodeGen/NVPTX/MachineSink-call.ll
@@ -8,10 +8,10 @@ declare void @foo()
 ; Load a value, then call a function.  Branch, and use the loaded value only on
 ; one side of the branch.  The load shouldn't be sunk beneath the call, because
 ; the call may modify memory.
-define i32 @f(i32 %x, i32* %ptr, i1 %cond) {
+define i32 @f(i32 %x, ptr %ptr, i1 %cond) {
 Start:
   ; CHECK: ld.u32
-  %ptr_val = load i32, i32* %ptr
+  %ptr_val = load i32, ptr %ptr
   ; CHECK: call.uni
   call void @foo()
   br i1 %cond, label %L1, label %L2

diff  --git a/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll b/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
index d3814340c2812..222f147a7d46a 100644
--- a/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
+++ b/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
@@ -8,10 +8,10 @@ declare void @llvm.nvvm.barrier0()
 ; Load a value, then syncthreads.  Branch, and use the loaded value only on one
 ; side of the branch.  The load shouldn't be sunk beneath the call, because
 ; syncthreads is modeled as maystore.
-define i32 @f(i32 %x, i32* %ptr, i1 %cond) {
+define i32 @f(i32 %x, ptr %ptr, i1 %cond) {
 Start:
   ; CHECK: ld.u32
-  %ptr_val = load i32, i32* %ptr
+  %ptr_val = load i32, ptr %ptr
   ; CHECK: bar.sync
   call void @llvm.nvvm.barrier0()
   br i1 %cond, label %L1, label %L2

diff  --git a/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll b/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
index c99702e1a60f5..32fcb3bff619b 100644
--- a/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
+++ b/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
@@ -9,16 +9,16 @@ declare void @llvm.nvvm.barrier0()
 ; CHECK: .func call_syncthreads
 ; CHECK: bar.sync
 ; CHECK-NOT: bar.sync
-define void @call_syncthreads(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
+define void @call_syncthreads(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
   br i1 %cond, label %L1, label %L2
   br i1 %cond2, label %Ret, label %L1
 Ret:
   ret void
 L1:
-  store i32 0, i32* %a
+  store i32 0, ptr %a
   br label %L42
 L2:
-  store i32 1, i32* %a
+  store i32 1, ptr %a
   br label %L42
 L42:
   call void @llvm.nvvm.barrier0()
@@ -29,16 +29,16 @@ L42:
 ; CHECK: .func call_foo
 ; CHECK: call
 ; CHECK: call
-define void @call_foo(i32* %a, i32* %b, i1 %cond, i1 %cond2) nounwind {
+define void @call_foo(ptr %a, ptr %b, i1 %cond, i1 %cond2) nounwind {
   br i1 %cond, label %L1, label %L2
   br i1 %cond2, label %Ret, label %L1
 Ret:
   ret void
 L1:
-  store i32 0, i32* %a
+  store i32 0, ptr %a
   br label %L42
 L2:
-  store i32 1, i32* %a
+  store i32 1, ptr %a
   br label %L42
 L42:
   call void @foo()

diff  --git a/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll
index 99971557db436..acc0d9ae79c39 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-gvar.ll
@@ -10,40 +10,40 @@
 
 @g = addrspace(1) global i32 42
 @ga = addrspace(1) global [4 x i8] c"\00\01\02\03"
- at g2 = addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @g to i32*)
- at g3 = addrspace(1) global i32 addrspace(1)* @g
- at g4 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* @g to i32*)}
- at g5 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* @g, i32 2) to i32*)}
+ at g2 = addrspace(1) global ptr addrspacecast (ptr addrspace(1) @g to ptr)
+ at g3 = addrspace(1) global ptr addrspace(1) @g
+ at g4 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) @g to ptr)}
+ at g5 = constant {ptr, ptr} {ptr null, ptr addrspacecast (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) @g, i32 2) to ptr)}
 
 ; CHECK: .visible .global .align 4 .u32 g6 = generic(ga)+2;
- at g6 = addrspace(1) global i8* getelementptr inbounds (
-  [4 x i8], [4 x i8]* addrspacecast ([4 x i8] addrspace(1)* @ga to [4 x i8]*),
+ at g6 = addrspace(1) global ptr getelementptr inbounds (
+  [4 x i8], ptr addrspacecast (ptr addrspace(1) @ga to ptr),
   i32 0, i32 2
 )
 
 ; CHECK: .visible .global .align 4 .u32 g7 = generic(g);
- at g7 = addrspace(1) global i8* addrspacecast (
-  i8 addrspace(1)* bitcast (i32 addrspace(1)* @g to i8 addrspace(1)*)
-  to i8*
+ at g7 = addrspace(1) global ptr addrspacecast (
+  ptr addrspace(1) @g
+  to ptr
 )
 
 ; CHECK: .visible .global .align 4 .u32 g8[2] = {0, g};
- at g8 = addrspace(1) global [2 x i32 addrspace(1)*] [i32 addrspace(1)* null, i32 addrspace(1)* @g]
+ at g8 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @g]
 
 ; CHECK: .visible .global .align 4 .u32 g9[2] = {0, generic(g)};
- at g9 = addrspace(1) global [2 x i32*] [
-  i32* null,
-  i32* addrspacecast (i32 addrspace(1)* @g to i32*)
+ at g9 = addrspace(1) global [2 x ptr] [
+  ptr null,
+  ptr addrspacecast (ptr addrspace(1) @g to ptr)
 ]
 
 ; CHECK: .visible .global .align 4 .u32 g10[2] = {0, g};
- at g10 = addrspace(1) global [2 x i8 addrspace(1)*] [
-  i8 addrspace(1)* null,
-  i8 addrspace(1)* bitcast (i32 addrspace(1)* @g to i8 addrspace(1)*)
+ at g10 = addrspace(1) global [2 x ptr addrspace(1)] [
+  ptr addrspace(1) null,
+  ptr addrspace(1) @g
 ]
 
 ; CHECK: .visible .global .align 4 .u32 g11[2] = {0, generic(g)};
- at g11 = addrspace(1) global [2 x i8*] [
-  i8* null,
-  i8* bitcast (i32* addrspacecast (i32 addrspace(1)* @g to i32*) to i8*)
+ at g11 = addrspace(1) global [2 x ptr] [
+  ptr null,
+  ptr addrspacecast (ptr addrspace(1) @g to ptr)
 ]

diff  --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
index b8d169b9c9f35..8c839eb4e8ed6 100644
--- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll
+++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
@@ -6,17 +6,17 @@ declare <3 x float> @barv3(<3 x float> %input)
 declare [2 x float] @bara([2 x float] %input)
 declare {float, float} @bars({float, float} %input)
 
-define void @test_v2f32(<2 x float> %input, <2 x float>* %output) {
+define void @test_v2f32(<2 x float> %input, ptr %output) {
 ; CHECK-LABEL: @test_v2f32
   %call = tail call <2 x float> @barv(<2 x float> %input)
 ; CHECK: .param .align 8 .b8 retval0[8];
 ; CHECK: ld.param.v2.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]]}, [retval0+0];
-  store <2 x float> %call, <2 x float>* %output, align 8
+  store <2 x float> %call, ptr %output, align 8
 ; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[E0]], [[E1]]}
   ret void
 }
 
-define void @test_v3f32(<3 x float> %input, <3 x float>* %output) {
+define void @test_v3f32(<3 x float> %input, ptr %output) {
 ; CHECK-LABEL: @test_v3f32
 ;
   %call = tail call <3 x float> @barv3(<3 x float> %input)
@@ -25,7 +25,7 @@ define void @test_v3f32(<3 x float> %input, <3 x float>* %output) {
 ; CHECK-DAG: ld.param.f32 [[E2:%f[0-9]+]], [retval0+8];
 ; Make sure we don't load more values than than we need to.
 ; CHECK-NOT: ld.param.f32 [[E3:%f[0-9]+]], [retval0+12];
-  store <3 x float> %call, <3 x float>* %output, align 8
+  store <3 x float> %call, ptr %output, align 8
 ; CHECK-DAG: st.f32 [{{%rd[0-9]}}+8],
 ; -- This is suboptimal. We should do st.v2.f32 instead
 ;    of combining 2xf32 info i64.
@@ -34,13 +34,13 @@ define void @test_v3f32(<3 x float> %input, <3 x float>* %output) {
   ret void
 }
 
-define void @test_a2f32([2 x float] %input, [2 x float]* %output) {
+define void @test_a2f32([2 x float] %input, ptr %output) {
 ; CHECK-LABEL: @test_a2f32
   %call = tail call [2 x float] @bara([2 x float] %input)
 ; CHECK: .param .align 4 .b8 retval0[8];
 ; CHECK-DAG: ld.param.f32 [[ELEMA1:%f[0-9]+]], [retval0+0];
 ; CHECK-DAG: ld.param.f32 [[ELEMA2:%f[0-9]+]], [retval0+4];
-  store [2 x float] %call, [2 x float]* %output, align 4
+  store [2 x float] %call, ptr %output, align 4
 ; CHECK: }
 ; CHECK-DAG: st.f32 [{{%rd[0-9]+}}], [[ELEMA1]]
 ; CHECK-DAG: st.f32 [{{%rd[0-9]+}}+4], [[ELEMA2]]
@@ -48,13 +48,13 @@ define void @test_a2f32([2 x float] %input, [2 x float]* %output) {
 ; CHECK: ret
 }
 
-define void @test_s2f32({float, float} %input, {float, float}* %output) {
+define void @test_s2f32({float, float} %input, ptr %output) {
 ; CHECK-LABEL: @test_s2f32
   %call = tail call {float, float} @bars({float, float} %input)
 ; CHECK: .param .align 4 .b8 retval0[8];
 ; CHECK-DAG: ld.param.f32 [[ELEMS1:%f[0-9]+]], [retval0+0];
 ; CHECK-DAG: ld.param.f32 [[ELEMS2:%f[0-9]+]], [retval0+4];
-  store {float, float} %call, {float, float}* %output, align 4
+  store {float, float} %call, ptr %output, align 4
 ; CHECK: }
 ; CHECK-DAG: st.f32 [{{%rd[0-9]+}}], [[ELEMS1]]
 ; CHECK-DAG: st.f32 [{{%rd[0-9]+}}+4], [[ELEMS2]]

diff  --git a/llvm/test/CodeGen/NVPTX/alias.ll b/llvm/test/CodeGen/NVPTX/alias.ll
index 6dad3845b0862..6124a7c0c8e32 100644
--- a/llvm/test/CodeGen/NVPTX/alias.ll
+++ b/llvm/test/CodeGen/NVPTX/alias.ll
@@ -4,4 +4,4 @@
 
 define i32 @a() { ret i32 0 }
 ; CHECK: ERROR: Module has aliases
- at b = internal alias i32 (), i32 ()* @a
+ at b = internal alias i32 (), ptr @a

diff  --git a/llvm/test/CodeGen/NVPTX/annotations.ll b/llvm/test/CodeGen/NVPTX/annotations.ll
index 83ef559d6a4ae..05a0944530018 100644
--- a/llvm/test/CodeGen/NVPTX/annotations.ll
+++ b/llvm/test/CodeGen/NVPTX/annotations.ll
@@ -9,21 +9,21 @@
 ; CHECK: .global .surfref surface
 
 ; CHECK: .entry kernel_func_maxntid
-define void @kernel_func_maxntid(float* %a) {
+define void @kernel_func_maxntid(ptr %a) {
 ; CHECK: .maxntid 10, 20, 30
 ; CHECK: ret
   ret void
 }
 
 ; CHECK: .entry kernel_func_reqntid
-define void @kernel_func_reqntid(float* %a) {
+define void @kernel_func_reqntid(ptr %a) {
 ; CHECK: .reqntid 11, 22, 33
 ; CHECK: ret
   ret void
 }
 
 ; CHECK: .entry kernel_func_minctasm
-define void @kernel_func_minctasm(float* %a) {
+define void @kernel_func_minctasm(ptr %a) {
 ; CHECK: .minnctapersm 42
 ; CHECK: ret
   ret void
@@ -38,17 +38,17 @@ define void @kernel_func_maxnreg() {
 
 !nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
 
-!1 = !{void (float*)* @kernel_func_maxntid, !"kernel", i32 1}
-!2 = !{void (float*)* @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
+!1 = !{ptr @kernel_func_maxntid, !"kernel", i32 1}
+!2 = !{ptr @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
 
-!3 = !{void (float*)* @kernel_func_reqntid, !"kernel", i32 1}
-!4 = !{void (float*)* @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
+!3 = !{ptr @kernel_func_reqntid, !"kernel", i32 1}
+!4 = !{ptr @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
 
-!5 = !{void (float*)* @kernel_func_minctasm, !"kernel", i32 1}
-!6 = !{void (float*)* @kernel_func_minctasm, !"minctasm", i32 42}
+!5 = !{ptr @kernel_func_minctasm, !"kernel", i32 1}
+!6 = !{ptr @kernel_func_minctasm, !"minctasm", i32 42}
 
-!7 = !{void ()* @kernel_func_maxnreg, !"kernel", i32 1}
-!8 = !{void ()* @kernel_func_maxnreg, !"maxnreg", i32 1234}
+!7 = !{ptr @kernel_func_maxnreg, !"kernel", i32 1}
+!8 = !{ptr @kernel_func_maxnreg, !"maxnreg", i32 1234}
 
-!9 = !{i64 addrspace(1)* @texture, !"texture", i32 1}
-!10 = !{i64 addrspace(1)* @surface, !"surface", i32 1}
+!9 = !{ptr addrspace(1) @texture, !"texture", i32 1}
+!10 = !{ptr addrspace(1) @surface, !"surface", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/async-copy.ll b/llvm/test/CodeGen/NVPTX/async-copy.ll
index fe021ca5bebca..55c7a6d4874c5 100644
--- a/llvm/test/CodeGen/NVPTX/async-copy.ll
+++ b/llvm/test/CodeGen/NVPTX/async-copy.ll
@@ -34,79 +34,79 @@ define void @asynccommitgroup() {
   ret void
 }
 
-declare void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
-declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
+declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
 
 ; CHECK-LABEL: asyncmbarrier
-define void @asyncmbarrier(i64* %a) {
+define void @asyncmbarrier(ptr %a) {
 ; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%r{{[0-9]+}}];
 ; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%rd{{[0-9]+}}];
-  tail call void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
+  tail call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
   ret void
 }
 
 ; CHECK-LABEL: asyncmbarriershared
-define void @asyncmbarriershared(i64 addrspace(3)* %a) {
+define void @asyncmbarriershared(ptr addrspace(3) %a) {
 ; CHECK_PTX32: cp.async.mbarrier.arrive.shared.b64 [%r{{[0-9]+}}];
 ; CHECK_PTX64: cp.async.mbarrier.arrive.shared.b64 [%rd{{[0-9]+}}];
-  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
   ret void
 }
 
 ; CHECK-LABEL: asyncmbarriernoinc
-define void @asyncmbarriernoinc(i64* %a) {
+define void @asyncmbarriernoinc(ptr %a) {
 ; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.b64 [%r{{[0-9]+}}];
 ; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%rd{{[0-9]+}}];
-  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
+  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
   ret void
 }
 
 ; CHECK-LABEL: asyncmbarriernoincshared
-define void @asyncmbarriernoincshared(i64 addrspace(3)* %a) {
+define void @asyncmbarriernoincshared(ptr addrspace(3) %a) {
 ; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.shared.b64 [%r{{[0-9]+}}];
 ; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.shared.b64 [%rd{{[0-9]+}}];
-  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
+  tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
   ret void
 }
 
-declare void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
 
 ; CHECK-LABEL: asynccasharedglobal4i8
-define void @asynccasharedglobal4i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal4i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 4;
 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 4;
-  tail call void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+  tail call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
   ret void
 }
 
-declare void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
 
 ; CHECK-LABEL: asynccasharedglobal8i8
-define void @asynccasharedglobal8i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal8i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 8;
 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 8;
-  tail call void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+  tail call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
   ret void
 }
 
-declare void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
 
 ; CHECK-LABEL: asynccasharedglobal16i8
-define void @asynccasharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccasharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
-  tail call void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+  tail call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
   ret void
 }
 
-declare void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+declare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
 
 ; CHECK-LABEL: asynccgsharedglobal16i8
-define void @asynccgsharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
+define void @asynccgsharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b) {
 ; CHECK_PTX32: cp.async.cg.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
 ; CHECK_PTX64: cp.async.cg.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
-  tail call void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
+  tail call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
index 07694a91d3617..8f272dcfe3228 100644
--- a/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
+++ b/llvm/test/CodeGen/NVPTX/atomic-lower-local.ll
@@ -7,12 +7,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 target triple = "nvptx64-unknown-unknown"
 
-define double @kernel(double addrspace(5)* %ptr, double %val) {
-  %res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8
+define double @kernel(ptr addrspace(5) %ptr, double %val) {
+  %res = atomicrmw fadd ptr addrspace(5) %ptr, double %val monotonic, align 8
   ret double %res
-; CHECK:   %1 = load double, double addrspace(5)* %ptr, align 8
+; CHECK:   %1 = load double, ptr addrspace(5) %ptr, align 8
 ; CHECK-NEXT:   %new = fadd double %1, %val
-; CHECK-NEXT:   store double %new, double addrspace(5)* %ptr, align 8
+; CHECK-NEXT:   store double %new, ptr addrspace(5) %ptr, align 8
 ; CHECK-NEXT:   ret double %1
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
index 405a547bc3609..624ecb48c3433 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm60.ll
@@ -4,29 +4,29 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test(
-define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
 ; CHECK: atom.add.f64
-  %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
+  %r1 = call double @llvm.nvvm.atomic.load.add.f64.p0(ptr %dp0, double %d)
 ; CHECK: atom.global.add.f64
-  %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d)
+  %r2 = call double @llvm.nvvm.atomic.load.add.f64.p1(ptr addrspace(1) %dp1, double %d)
 ; CHECK: atom.shared.add.f64
-  %ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d)
+  %ret = call double @llvm.nvvm.atomic.load.add.f64.p3(ptr addrspace(3) %dp3, double %d)
   ret void
 }
 
 ; CHECK-LABEL: .func test2(
-define void @test2(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
+define void @test2(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, double %d) {
 ; CHECK: atom.add.f64
-  %r1 = atomicrmw fadd double* %dp0, double %d seq_cst
+  %r1 = atomicrmw fadd ptr %dp0, double %d seq_cst
 ; CHECK: atom.global.add.f64
-  %r2 = atomicrmw fadd double addrspace(1)* %dp1, double %d seq_cst
+  %r2 = atomicrmw fadd ptr addrspace(1) %dp1, double %d seq_cst
 ; CHECK: atom.shared.add.f64
-  %ret = atomicrmw fadd double addrspace(3)* %dp3, double %d seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %dp3, double %d seq_cst
   ret void
 }
 
-declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
-declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
-declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p0(ptr nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p1(ptr addrspace(1) nocapture, double) #1
+declare double @llvm.nvvm.atomic.load.add.f64.p3(ptr addrspace(3) nocapture, double) #1
 
 attributes #1 = { argmemonly nounwind }

diff  --git a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
index c40f4e54f9bf1..80643125a4c17 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-with-scope.ll
@@ -4,101 +4,101 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify -arch=sm_60 %}
 
 ; CHECK-LABEL: .func test_atomics_scope(
-define void @test_atomics_scope(float* %fp, float %f,
-                                double* %dfp, double %df,
-                                i32* %ip, i32 %i,
-                                i32* %uip, i32 %ui,
-                                i64* %llp, i64 %ll) #0 {
+define void @test_atomics_scope(ptr %fp, float %f,
+                                ptr %dfp, double %df,
+                                ptr %ip, i32 %i,
+                                ptr %uip, i32 %ui,
+                                ptr %llp, i64 %ll) #0 {
 entry:
 ; CHECK: atom.cta.add.s32
-  %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp36 = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.add.u64
-  %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp38 = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.add.s32
-  %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp39 = tail call i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.add.u64
-  %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp41 = tail call i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.cta.add.f32
-  %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
+  %tmp42 = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
 ; CHECK: atom.cta.add.f64
-  %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
+  %tmp43 = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
 ; CHECK: atom.sys.add.f32
-  %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* %fp, float %f)
+  %tmp44 = tail call float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr %fp, float %f)
 ; CHECK: atom.sys.add.f64
-  %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* %dfp, double %df)
+  %tmp45 = tail call double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr %dfp, double %df)
 
 ; CHECK: atom.cta.exch.b32
-  %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp46 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.exch.b64
-  %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp48 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.exch.b32
-  %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp49 = tail call i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.exch.b64
-  %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp51 = tail call i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.max.s32
-  %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp52 = tail call i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.max.s64
-  %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp56 = tail call i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.max.s32
-  %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp58 = tail call i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.max.s64
-  %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp62 = tail call i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.min.s32
-  %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp64 = tail call i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.min.s64
-  %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp68 = tail call i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.min.s32
-  %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp70 = tail call i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.min.s64
-  %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp74 = tail call i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.inc.u32
-  %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp76 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.inc.u32
-  %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp77 = tail call i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 
 ; CHECK: atom.cta.dec.u32
-  %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp78 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.dec.u32
-  %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp79 = tail call i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 
 ; CHECK: atom.cta.and.b32
-  %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp80 = tail call i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.and.b64
-  %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp82 = tail call i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.and.b32
-  %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp83 = tail call i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.and.b64
-  %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp85 = tail call i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.or.b32
-  %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp86 = tail call i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.or.b64
-  %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp88 = tail call i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.or.b32
-  %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp89 = tail call i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.or.b64
-  %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp91 = tail call i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.xor.b32
-  %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp92 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.xor.b64
-  %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp94 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.sys.xor.b32
-  %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* %ip, i32 %i)
+  %tmp95 = tail call i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.sys.xor.b64
-  %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp97 = tail call i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr %llp, i64 %ll)
 
 ; CHECK: atom.cta.cas.b32
-  %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+  %tmp98 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
 ; CHECK: atom.cta.cas.b64
-  %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
+  %tmp100 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr %llp, i64 %ll, i64 %ll)
 ; CHECK: atom.sys.cas.b32
-  %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+  %tmp101 = tail call i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr %ip, i32 %i, i32 %i)
 ; CHECK: atom.sys.cas.b64
-  %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* %llp, i64 %ll, i64 %ll)
+  %tmp103 = tail call i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr %llp, i64 %ll, i64 %ll)
 
   ; CHECK: ret
   ret void
@@ -106,84 +106,84 @@ entry:
 
 ; Make sure we use constants as operands to our scoped atomic calls, where appropriate.
 ; CHECK-LABEL: .func test_atomics_scope_imm(
-define void @test_atomics_scope_imm(float* %fp, float %f,
-                                    double* %dfp, double %df,
-                                    i32* %ip, i32 %i,
-                                    i32* %uip, i32 %ui,
-                                    i64* %llp, i64 %ll) #0 {
+define void @test_atomics_scope_imm(ptr %fp, float %f,
+                                    ptr %dfp, double %df,
+                                    ptr %ip, i32 %i,
+                                    ptr %uip, i32 %ui,
+                                    ptr %llp, i64 %ll) #0 {
 
 ; CHECK: atom.cta.add.s32{{.*}} %r{{[0-9]+}};
-  %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 %i)
+  %tmp1r = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 %i)
 ; CHECK: atom.cta.add.s32{{.*}}, 1;
-  %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* %ip, i32 1)
+  %tmp1i = tail call i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr %ip, i32 1)
 ; CHECK: atom.cta.add.u64{{.*}}, %rd{{[0-9]+}};
-  %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 %ll)
+  %tmp2r = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 %ll)
 ; CHECK: atom.cta.add.u64{{.*}}, 2;
-  %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* %llp, i64 2)
+  %tmp2i = tail call i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr %llp, i64 2)
 
 ; CHECK: atom.cta.add.f32{{.*}}, %f{{[0-9]+}};
-  %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float %f)
+  %tmp3r = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float %f)
 ; CHECK: atom.cta.add.f32{{.*}}, 0f40400000;
-  %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* %fp, float 3.0)
+  %tmp3i = tail call float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr %fp, float 3.0)
 ; CHECK: atom.cta.add.f64{{.*}}, %fd{{[0-9]+}};
-  %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double %df)
+  %tmp4r = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double %df)
 ; CHECK: atom.cta.add.f64{{.*}}, 0d4010000000000000;
-  %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* %dfp, double 4.0)
+  %tmp4i = tail call double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr %dfp, double 4.0)
 
 ; CAS is implemented separately and has more arguments
 ; CHECK: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, %r{{[0-9+]}};
-  %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 %i)
+  %tmp5rr = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 %i)
 ; For some reason in 64-bit mode we end up passing 51 via a register.
 ; CHECK32: atom.cta.cas.b32{{.*}}], %r{{[0-9+]}}, 51;
-  %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 %i, i32 51)
+  %tmp5ri = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 %i, i32 51)
 ; CHECK: atom.cta.cas.b32{{.*}}], 52, %r{{[0-9+]}};
-  %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 52, i32 %i)
+  %tmp5ir = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 52, i32 %i)
 ; CHECK: atom.cta.cas.b32{{.*}}], 53, 54;
-  %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* %ip, i32 53, i32 54)
+  %tmp5ii = tail call i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr %ip, i32 53, i32 54)
 
   ; CHECK: ret
   ret void
 }
 
-declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0f32(float* nocapture, float) #1
-declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0f64(double* nocapture, double) #1
-declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0f32(float* nocapture, float) #1
-declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0f64(double* nocapture, double) #1
-declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0i32(i32* nocapture, i32) #1
-declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0i64(i64* nocapture, i64) #1
-declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0i32(i32* nocapture, i32, i32) #1
-declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0i64(i64* nocapture, i64, i64) #1
-declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0i32(i32* nocapture, i32, i32) #1
-declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0i64(i64* nocapture, i64, i64) #1
+declare i32 @llvm.nvvm.atomic.add.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.add.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.add.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.add.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare float @llvm.nvvm.atomic.add.gen.f.cta.f32.p0(ptr nocapture, float) #1
+declare double @llvm.nvvm.atomic.add.gen.f.cta.f64.p0(ptr nocapture, double) #1
+declare float @llvm.nvvm.atomic.add.gen.f.sys.f32.p0(ptr nocapture, float) #1
+declare double @llvm.nvvm.atomic.add.gen.f.sys.f64.p0(ptr nocapture, double) #1
+declare i32 @llvm.nvvm.atomic.exch.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.exch.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.exch.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.exch.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.max.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.max.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.max.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.max.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.min.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.min.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.min.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.min.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.inc.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.inc.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.dec.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.dec.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i32 @llvm.nvvm.atomic.and.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.and.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.and.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.and.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.or.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.or.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.or.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.or.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.xor.gen.i.cta.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.xor.gen.i.cta.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.xor.gen.i.sys.i32.p0(ptr nocapture, i32) #1
+declare i64 @llvm.nvvm.atomic.xor.gen.i.sys.i64.p0(ptr nocapture, i64) #1
+declare i32 @llvm.nvvm.atomic.cas.gen.i.cta.i32.p0(ptr nocapture, i32, i32) #1
+declare i64 @llvm.nvvm.atomic.cas.gen.i.cta.i64.p0(ptr nocapture, i64, i64) #1
+declare i32 @llvm.nvvm.atomic.cas.gen.i.sys.i32.p0(ptr nocapture, i32, i32) #1
+declare i64 @llvm.nvvm.atomic.cas.gen.i.sys.i64.p0(ptr nocapture, i64, i64) #1
 
 attributes #1 = { argmemonly nounwind }

diff  --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index d0e1b836a4b61..9cf600e0939f2 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -3,202 +3,202 @@
 
 
 ; CHECK-LABEL: atom0
-define i32 @atom0(i32* %addr, i32 %val) {
+define i32 @atom0(ptr %addr, i32 %val) {
 ; CHECK: atom.add.u32
-  %ret = atomicrmw add i32* %addr, i32 %val seq_cst
+  %ret = atomicrmw add ptr %addr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom1
-define i64 @atom1(i64* %addr, i64 %val) {
+define i64 @atom1(ptr %addr, i64 %val) {
 ; CHECK: atom.add.u64
-  %ret = atomicrmw add i64* %addr, i64 %val seq_cst
+  %ret = atomicrmw add ptr %addr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom2
-define i32 @atom2(i32* %subr, i32 %val) {
+define i32 @atom2(ptr %subr, i32 %val) {
 ; CHECK: neg.s32
 ; CHECK: atom.add.u32
-  %ret = atomicrmw sub i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw sub ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom3
-define i64 @atom3(i64* %subr, i64 %val) {
+define i64 @atom3(ptr %subr, i64 %val) {
 ; CHECK: neg.s64
 ; CHECK: atom.add.u64
-  %ret = atomicrmw sub i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw sub ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom4
-define i32 @atom4(i32* %subr, i32 %val) {
+define i32 @atom4(ptr %subr, i32 %val) {
 ; CHECK: atom.and.b32
-  %ret = atomicrmw and i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw and ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom5
-define i64 @atom5(i64* %subr, i64 %val) {
+define i64 @atom5(ptr %subr, i64 %val) {
 ; CHECK: atom.and.b64
-  %ret = atomicrmw and i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw and ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ;; NAND not yet supported
-;define i32 @atom6(i32* %subr, i32 %val) {
-;  %ret = atomicrmw nand i32* %subr, i32 %val seq_cst
+;define i32 @atom6(ptr %subr, i32 %val) {
+;  %ret = atomicrmw nand ptr %subr, i32 %val seq_cst
 ;  ret i32 %ret
 ;}
 
-;define i64 @atom7(i64* %subr, i64 %val) {
-;  %ret = atomicrmw nand i64* %subr, i64 %val seq_cst
+;define i64 @atom7(ptr %subr, i64 %val) {
+;  %ret = atomicrmw nand ptr %subr, i64 %val seq_cst
 ;  ret i64 %ret
 ;}
 
 ; CHECK-LABEL: atom8
-define i32 @atom8(i32* %subr, i32 %val) {
+define i32 @atom8(ptr %subr, i32 %val) {
 ; CHECK: atom.or.b32
-  %ret = atomicrmw or i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw or ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom9
-define i64 @atom9(i64* %subr, i64 %val) {
+define i64 @atom9(ptr %subr, i64 %val) {
 ; CHECK: atom.or.b64
-  %ret = atomicrmw or i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw or ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom10
-define i32 @atom10(i32* %subr, i32 %val) {
+define i32 @atom10(ptr %subr, i32 %val) {
 ; CHECK: atom.xor.b32
-  %ret = atomicrmw xor i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw xor ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom11
-define i64 @atom11(i64* %subr, i64 %val) {
+define i64 @atom11(ptr %subr, i64 %val) {
 ; CHECK: atom.xor.b64
-  %ret = atomicrmw xor i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw xor ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom12
-define i32 @atom12(i32* %subr, i32 %val) {
+define i32 @atom12(ptr %subr, i32 %val) {
 ; CHECK: atom.max.s32
-  %ret = atomicrmw max i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw max ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom13
-define i64 @atom13(i64* %subr, i64 %val) {
+define i64 @atom13(ptr %subr, i64 %val) {
 ; CHECK: atom.max.s64
-  %ret = atomicrmw max i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw max ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom14
-define i32 @atom14(i32* %subr, i32 %val) {
+define i32 @atom14(ptr %subr, i32 %val) {
 ; CHECK: atom.min.s32
-  %ret = atomicrmw min i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw min ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom15
-define i64 @atom15(i64* %subr, i64 %val) {
+define i64 @atom15(ptr %subr, i64 %val) {
 ; CHECK: atom.min.s64
-  %ret = atomicrmw min i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw min ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom16
-define i32 @atom16(i32* %subr, i32 %val) {
+define i32 @atom16(ptr %subr, i32 %val) {
 ; CHECK: atom.max.u32
-  %ret = atomicrmw umax i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw umax ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom17
-define i64 @atom17(i64* %subr, i64 %val) {
+define i64 @atom17(ptr %subr, i64 %val) {
 ; CHECK: atom.max.u64
-  %ret = atomicrmw umax i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw umax ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
 ; CHECK-LABEL: atom18
-define i32 @atom18(i32* %subr, i32 %val) {
+define i32 @atom18(ptr %subr, i32 %val) {
 ; CHECK: atom.min.u32
-  %ret = atomicrmw umin i32* %subr, i32 %val seq_cst
+  %ret = atomicrmw umin ptr %subr, i32 %val seq_cst
   ret i32 %ret
 }
 
 ; CHECK-LABEL: atom19
-define i64 @atom19(i64* %subr, i64 %val) {
+define i64 @atom19(ptr %subr, i64 %val) {
 ; CHECK: atom.min.u64
-  %ret = atomicrmw umin i64* %subr, i64 %val seq_cst
+  %ret = atomicrmw umin ptr %subr, i64 %val seq_cst
   ret i64 %ret
 }
 
-declare float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
 
 ; CHECK-LABEL: atomic_add_f32_generic
-define float @atomic_add_f32_generic(float* %addr, float %val) {
+define float @atomic_add_f32_generic(ptr %addr, float %val) {
 ; CHECK: atom.add.f32
-  %ret = call float @llvm.nvvm.atomic.load.add.f32.p0f32(float* %addr, float %val)
+  %ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
   ret float %ret
 }
 
-declare float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
 
 ; CHECK-LABEL: atomic_add_f32_addrspace1
-define float @atomic_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
+define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
 ; CHECK: atom.global.add.f32
-  %ret = call float @llvm.nvvm.atomic.load.add.f32.p1f32(float addrspace(1)* %addr, float %val)
+  %ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
   ret float %ret
 }
 
-declare float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
+declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
 
 ; CHECK-LABEL: atomic_add_f32_addrspace3
-define float @atomic_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
+define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
 ; CHECK: atom.shared.add.f32
-  %ret = call float @llvm.nvvm.atomic.load.add.f32.p3f32(float addrspace(3)* %addr, float %val)
+  %ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
   ret float %ret
 }
 
 ; CHECK-LABEL: atomicrmw_add_f32_generic
-define float @atomicrmw_add_f32_generic(float* %addr, float %val) {
+define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
 ; CHECK: atom.add.f32
-  %ret = atomicrmw fadd float* %addr, float %val seq_cst
+  %ret = atomicrmw fadd ptr %addr, float %val seq_cst
   ret float %ret
 }
 
 ; CHECK-LABEL: atomicrmw_add_f32_addrspace1
-define float @atomicrmw_add_f32_addrspace1(float addrspace(1)* %addr, float %val) {
+define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
 ; CHECK: atom.global.add.f32
-  %ret = atomicrmw fadd float addrspace(1)* %addr, float %val seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %addr, float %val seq_cst
   ret float %ret
 }
 
 ; CHECK-LABEL: atomicrmw_add_f32_addrspace3
-define float @atomicrmw_add_f32_addrspace3(float addrspace(3)* %addr, float %val) {
+define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
 ; CHECK: atom.shared.add.f32
-  %ret = atomicrmw fadd float addrspace(3)* %addr, float %val seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %addr, float %val seq_cst
   ret float %ret
 }
 
 ; CHECK-LABEL: atomic_cmpxchg_i32
-define i32 @atomic_cmpxchg_i32(i32* %addr, i32 %cmp, i32 %new) {
+define i32 @atomic_cmpxchg_i32(ptr %addr, i32 %cmp, i32 %new) {
 ; CHECK: atom.cas.b32
-  %pairold = cmpxchg i32* %addr, i32 %cmp, i32 %new seq_cst seq_cst
+  %pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
   ret i32 %new
 }
 
 ; CHECK-LABEL: atomic_cmpxchg_i64
-define i64 @atomic_cmpxchg_i64(i64* %addr, i64 %cmp, i64 %new) {
+define i64 @atomic_cmpxchg_i64(ptr %addr, i64 %cmp, i64 %new) {
 ; CHECK: atom.cas.b64
-  %pairold = cmpxchg i64* %addr, i64 %cmp, i64 %new seq_cst seq_cst
+  %pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
   ret i64 %new
 }

diff  --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll
index 9066dc2f07dd1..d9322dabfa065 100644
--- a/llvm/test/CodeGen/NVPTX/b52037.ll
+++ b/llvm/test/CodeGen/NVPTX/b52037.ll
@@ -17,17 +17,17 @@ target triple = "nvptx64-nvidia-cuda"
 %float4 = type { float, float, float, float }
 %float3 = type { float, float, float }
 %int3 = type { i32, i32, i32 }
-%struct.spam.2 = type { %struct.foo.3, i16*, float, float, i32, float }
-%struct.foo.3 = type <{ %float4*, %float4*, %float4*, i32*, i32*, i32, i32, float }>
+%struct.spam.2 = type { %struct.foo.3, ptr, float, float, i32, float }
+%struct.foo.3 = type <{ ptr, ptr, ptr, ptr, ptr, i32, i32, float }>
 %struct.zot = type { %struct.bar, [8 x i8], %struct.foo, [12 x i8] }
-%struct.bar = type { i32 (...)** }
-%struct.foo = type <{ i16*, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
+%struct.bar = type { ptr }
+%struct.foo = type <{ ptr, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }>
 
 @global = external local_unnamed_addr addrspace(4) externally_initialized global [27 x %char3], align 1
- at global_1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8
+ at global_1 = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 16 to ptr), ptr null, ptr null] }, align 8
 
 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
 
 declare %float4 @snork(float) local_unnamed_addr
 
@@ -39,63 +39,50 @@ declare %int3 @hoge(i32, i32, i32) local_unnamed_addr
 
 declare i64 @foo() local_unnamed_addr
 
-define void @barney(%struct.spam.2* nocapture readonly %arg) local_unnamed_addr {
+define void @barney(ptr nocapture readonly %arg) local_unnamed_addr {
 bb:
   tail call void asm sideeffect "// KEEP", ""() #1
   %tmp = alloca %struct.zot, align 16
-  %tmp4 = getelementptr inbounds %struct.spam.2, %struct.spam.2* %arg, i64 0, i32 1
-  %tmp5 = load i16*, i16** %tmp4, align 8
-  %tmp6 = bitcast %struct.zot* %tmp to i8*
-  %tmp9 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 1
-  %0 = bitcast %float4* %tmp9 to i16**
-  store i16* %tmp5, i16** %0, align 8
-  %tmp10 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 0, i32 0
-  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global_1, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %tmp10, align 16
-  %tmp34 = getelementptr %struct.spam.2, %struct.spam.2* %arg, i64 0, i32 0, i32 0
+  %tmp4 = getelementptr inbounds %struct.spam.2, ptr %arg, i64 0, i32 1
+  %tmp5 = load ptr, ptr %tmp4, align 8
+  %tmp9 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 1
+  store ptr %tmp5, ptr %tmp9, align 8
+  store ptr getelementptr inbounds ({ [3 x ptr] }, ptr @global_1, i64 0, inrange i32 0, i64 3), ptr %tmp, align 16
   %tmp.i1 = tail call i64 @foo()
-  %tmp44.i16 = getelementptr inbounds i16, i16* %tmp5, i64 undef
-  %tmp45.i17 = load i16, i16* %tmp44.i16, align 2
+  %tmp44.i16 = getelementptr inbounds i16, ptr %tmp5, i64 undef
+  %tmp45.i17 = load i16, ptr %tmp44.i16, align 2
   %tmp47.i18 = icmp eq i16 %tmp45.i17, -1
   br i1 %tmp47.i18, label %bb14, label %bb49.i.lr.ph
 
 bb49.i.lr.ph:                                     ; preds = %bb
-  %tmp16 = bitcast %struct.zot* %tmp to i8**
-  %tmp7.i6 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
+  %tmp7.i6 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
   %extract = lshr i16 %tmp45.i17, 11
   %extract.t = trunc i16 %extract to i8
-  %1 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
-  %tmp58.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 1, i32 2
-  %tmp59.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 2
-  %tmp62.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 2, i32 2
-  %2 = getelementptr inbounds %struct.foo, %struct.foo* %1, i64 1
-  %3 = bitcast %struct.foo* %2 to i8*
-  %tmp64.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 10
-  %tmp19.i.i = load float, float* %tmp58.i, align 16
-  %tmp23.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 2
-  %4 = bitcast %int3* %tmp23.i.i to float*
-  %tmp24.i.i = load float, float* %4, align 8
-  %5 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 5, i64 0
-  %6 = bitcast i8* %5 to float*
-  %.repack3.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 6
-  %7 = bitcast i64* %.repack3.i to float*
-  %tmp41.i.i = load i32, i32* %tmp62.i, align 16
-  %tmp48.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 3
-  %tmp49.i.i = load i32, i32* %tmp48.i.i, align 4
-  %tmp54.i.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4
-  %8 = bitcast %float3* %tmp54.i.i to i32*
-  %tmp55.i.i = load i32, i32* %8, align 8
-  %tmp9.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 7
-  %9 = bitcast i32* %tmp9.i to i64*
-  %tmp40.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 1
-  %10 = bitcast float* %tmp40.i to i32*
-  %tmp41.i = load i32, i32* %10, align 4
+  %0 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+  %tmp58.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 1, i32 2
+  %tmp59.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 2
+  %tmp62.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 2, i32 2
+  %1 = getelementptr inbounds %struct.foo, ptr %0, i64 1
+  %tmp64.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 10
+  %tmp19.i.i = load float, ptr %tmp58.i, align 16
+  %tmp23.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 2
+  %tmp24.i.i = load float, ptr %tmp23.i.i, align 8
+  %2 = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 5, i64 0
+  %.repack3.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 6
+  %tmp41.i.i = load i32, ptr %tmp62.i, align 16
+  %tmp48.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 3
+  %tmp49.i.i = load i32, ptr %tmp48.i.i, align 4
+  %tmp54.i.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4
+  %tmp55.i.i = load i32, ptr %tmp54.i.i, align 8
+  %tmp9.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 7
+  %tmp40.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 1
+  %tmp41.i = load i32, ptr %tmp40.i, align 4
   %tmp42.i = zext i32 %tmp41.i to i64
-  %tmp7.i = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
-  %tmp17.pre = load i8*, i8** %tmp16, align 16
-  %tmp60.i.peel = bitcast %struct.foo* %tmp7.i6 to i32**
-  %tmp61.i.peel = load i32*, i32** %tmp60.i.peel, align 16
+  %tmp7.i = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+  %tmp17.pre = load ptr, ptr %tmp, align 16
+  %tmp61.i.peel = load ptr, ptr %tmp7.i6, align 16
   %tmp10.i.i.peel = add nsw i8 %extract.t, -1
-  store i8 %tmp10.i.i.peel, i8* %3, align 4
+  store i8 %tmp10.i.i.peel, ptr %1, align 4
   %tmp13.i.i.peel = tail call %float3 @zot() #1
   %tmp15.i.i.peel = extractvalue %float3 %tmp13.i.i.peel, 0
   %tmp22.i.i.peel = fsub contract float %tmp19.i.i, %tmp15.i.i.peel
@@ -103,70 +90,64 @@ bb49.i.lr.ph:                                     ; preds = %bb
   %tmp27.i.i.peel = fsub contract float %tmp24.i.i, %tmp17.i.i.peel
   %tmp28.i.i.peel = tail call %float3 @bar_2(float %tmp22.i.i.peel, float %tmp27.i.i.peel) #1
   %tmp28.i.elt.i.peel = extractvalue %float3 %tmp28.i.i.peel, 0
-  store float %tmp28.i.elt.i.peel, float* %tmp59.i, align 16
+  store float %tmp28.i.elt.i.peel, ptr %tmp59.i, align 16
   %tmp28.i.elt2.i.peel = extractvalue %float3 %tmp28.i.i.peel, 1
-  store float %tmp28.i.elt2.i.peel, float* %6, align 4
+  store float %tmp28.i.elt2.i.peel, ptr %2, align 4
   %tmp28.i.elt4.i.peel = extractvalue %float3 %tmp28.i.i.peel, 2
-  store float %tmp28.i.elt4.i.peel, float* %7, align 8
+  store float %tmp28.i.elt4.i.peel, ptr %.repack3.i, align 8
   %tmp38.i.i.peel = zext i8 %tmp10.i.i.peel to i64
-  %tmp39.i5.i.peel = getelementptr inbounds [27 x %char3], [27 x %char3] addrspace(4)* @global, i64 0, i64 %tmp38.i.i.peel
-  %tmp39.i.i.peel = addrspacecast %char3 addrspace(4)* %tmp39.i5.i.peel to %char3*
-  %tmp42.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 0
-  %tmp43.i.i.peel = load i8, i8* %tmp42.i.i.peel, align 1
+  %tmp39.i5.i.peel = getelementptr inbounds [27 x %char3], ptr addrspace(4) @global, i64 0, i64 %tmp38.i.i.peel
+  %tmp39.i.i.peel = addrspacecast ptr addrspace(4) %tmp39.i5.i.peel to ptr
+  %tmp43.i.i.peel = load i8, ptr %tmp39.i.i.peel, align 1
   %tmp44.i.i.peel = sext i8 %tmp43.i.i.peel to i32
   %tmp45.i.i.peel = add nsw i32 %tmp41.i.i, %tmp44.i.i.peel
-  %tmp50.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 1
-  %tmp51.i.i.peel = load i8, i8* %tmp50.i.i.peel, align 1
+  %tmp50.i.i.peel = getelementptr inbounds %char3, ptr %tmp39.i.i.peel, i64 0, i32 1
+  %tmp51.i.i.peel = load i8, ptr %tmp50.i.i.peel, align 1
   %tmp52.i.i.peel = sext i8 %tmp51.i.i.peel to i32
   %tmp53.i.i.peel = add nsw i32 %tmp49.i.i, %tmp52.i.i.peel
-  %tmp56.i.i.peel = getelementptr inbounds %char3, %char3* %tmp39.i.i.peel, i64 0, i32 2
-  %tmp57.i.i.peel = load i8, i8* %tmp56.i.i.peel, align 1
+  %tmp56.i.i.peel = getelementptr inbounds %char3, ptr %tmp39.i.i.peel, i64 0, i32 2
+  %tmp57.i.i.peel = load i8, ptr %tmp56.i.i.peel, align 1
   %tmp58.i.i.peel = sext i8 %tmp57.i.i.peel to i32
   %tmp59.i.i.peel = add nsw i32 %tmp55.i.i, %tmp58.i.i.peel
   %tmp60.i.i.peel = tail call %int3 @hoge(i32 %tmp45.i.i.peel, i32 %tmp53.i.i.peel, i32 %tmp59.i.i.peel) #1
-  %tmp61.i.i.peel = getelementptr inbounds i32, i32* %tmp61.i.peel, i64 undef
-  %tmp62.i.i.peel = load i32, i32* %tmp61.i.i.peel, align 4
-  store i32 %tmp62.i.i.peel, i32* %tmp64.i, align 8
-  %tmp22.peel = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2
-  %11 = bitcast %struct.foo* %tmp22.peel to i8*
-  %tmp24.peel = getelementptr inbounds i8, i8* %11, i64 80
-  %12 = bitcast i8* %tmp24.peel to i32*
-  %tmp25.peel = load i32, i32* %12, align 16
-  %tmp36.peel = load %float4*, %float4** %tmp34, align 8
+  %tmp61.i.i.peel = getelementptr inbounds i32, ptr %tmp61.i.peel, i64 undef
+  %tmp62.i.i.peel = load i32, ptr %tmp61.i.i.peel, align 4
+  store i32 %tmp62.i.i.peel, ptr %tmp64.i, align 8
+  %tmp22.peel = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2
+  %tmp24.peel = getelementptr inbounds i8, ptr %tmp22.peel, i64 80
+  %tmp25.peel = load i32, ptr %tmp24.peel, align 16
+  %tmp36.peel = load ptr, ptr %arg, align 8
   %tmp37.peel = zext i32 %tmp25.peel to i64
-  %tmp38.peel = getelementptr inbounds %float4, %float4* %tmp36.peel, i64 %tmp37.peel
-  %tmp39.peel = bitcast %float4* %tmp38.peel to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %tmp39.peel, i64 undef, i1 false)
-  %tmp40.peel = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 2, i32 4, i32 2
-  %tmp41.peel25 = getelementptr inbounds float, float* %tmp40.peel, i64 2
-  %tmp42.peel = load float, float* %tmp41.peel25, align 8
-  %tmp44.peel = load float, float* inttoptr (i64 8 to float*), align 8
+  %tmp38.peel = getelementptr inbounds %float4, ptr %tmp36.peel, i64 %tmp37.peel
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 undef, ptr align 1 %tmp38.peel, i64 undef, i1 false)
+  %tmp40.peel = getelementptr inbounds %struct.zot, ptr %tmp, i64 0, i32 2, i32 4, i32 2
+  %tmp41.peel25 = getelementptr inbounds float, ptr %tmp40.peel, i64 2
+  %tmp42.peel = load float, ptr %tmp41.peel25, align 8
+  %tmp44.peel = load float, ptr inttoptr (i64 8 to ptr), align 8
   %tmp45.peel = fsub contract float %tmp42.peel, %tmp44.peel
   %tmp46.peel = tail call %float4 @snork(float %tmp45.peel)
   %tmp.i.peel = tail call i64 @foo()
-  %tmp10.i.peel = load i64, i64* %9, align 16
+  %tmp10.i.peel = load i64, ptr %tmp9.i, align 16
   %tmp11.i.peel = add i64 %tmp10.i.peel, %tmp.i.peel
-  store i64 %tmp11.i.peel, i64* %9, align 16, !tbaa !1
+  store i64 %tmp11.i.peel, ptr %tmp9.i, align 16, !tbaa !1
   %tmp43.i.peel = add i64 %tmp11.i.peel, %tmp42.i
-  %tmp44.i.peel = getelementptr inbounds i16, i16* %tmp5, i64 %tmp43.i.peel
-  %tmp45.i.peel = load i16, i16* %tmp44.i.peel, align 2
+  %tmp44.i.peel = getelementptr inbounds i16, ptr %tmp5, i64 %tmp43.i.peel
+  %tmp45.i.peel = load i16, ptr %tmp44.i.peel, align 2
   %tmp47.i.peel = icmp eq i16 %tmp45.i.peel, -1
   %extract21.peel = lshr i16 %tmp45.i.peel, 11
   %extract.t22.peel = trunc i16 %extract21.peel to i8
   br i1 %tmp47.i.peel, label %bb14, label %bb49.i.lr.ph.peel.newph
 
 bb49.i.lr.ph.peel.newph:                          ; preds = %bb49.i.lr.ph
-  %tmp60.i = bitcast %struct.foo* %tmp7.i to i32**
-  %tmp61.i = load i32*, i32** %tmp60.i, align 16
-  %tmp61.i.i = getelementptr inbounds i32, i32* %tmp61.i, i64 undef
-  %tmp18 = getelementptr i8, i8* %tmp17.pre, i64 -24
-  %tmp19 = bitcast i8* %tmp18 to i64*
+  %tmp61.i = load ptr, ptr %tmp7.i, align 16
+  %tmp61.i.i = getelementptr inbounds i32, ptr %tmp61.i, i64 undef
+  %tmp18 = getelementptr i8, ptr %tmp17.pre, i64 -24
   br label %bb49.i
 
 bb49.i:                                           ; preds = %bb49.i, %bb49.i.lr.ph.peel.newph
   %tmp45.i20.off11 = phi i8 [ %extract.t22.peel, %bb49.i.lr.ph.peel.newph ], [ %extract.t22, %bb49.i ]
   %tmp10.i.i = add nsw i8 %tmp45.i20.off11, -1
-  store i8 %tmp10.i.i, i8* %3, align 4
+  store i8 %tmp10.i.i, ptr %1, align 4
   %tmp13.i.i = tail call %float3 @zot() #1
   %tmp15.i.i = extractvalue %float3 %tmp13.i.i, 0
   %tmp22.i.i = fsub contract float %tmp19.i.i, %tmp15.i.i
@@ -174,53 +155,49 @@ bb49.i:                                           ; preds = %bb49.i, %bb49.i.lr.
   %tmp27.i.i = fsub contract float %tmp24.i.i, %tmp17.i.i
   %tmp28.i.i = tail call %float3 @bar_2(float %tmp22.i.i, float %tmp27.i.i) #1
   %tmp28.i.elt.i = extractvalue %float3 %tmp28.i.i, 0
-  store float %tmp28.i.elt.i, float* %tmp59.i, align 16
+  store float %tmp28.i.elt.i, ptr %tmp59.i, align 16
   %tmp28.i.elt2.i = extractvalue %float3 %tmp28.i.i, 1
-  store float %tmp28.i.elt2.i, float* %6, align 4
+  store float %tmp28.i.elt2.i, ptr %2, align 4
   %tmp28.i.elt4.i = extractvalue %float3 %tmp28.i.i, 2
-  store float %tmp28.i.elt4.i, float* %7, align 8
+  store float %tmp28.i.elt4.i, ptr %.repack3.i, align 8
   %tmp38.i.i = zext i8 %tmp10.i.i to i64
-  %tmp39.i5.i = getelementptr inbounds [27 x %char3], [27 x %char3] addrspace(4)* @global, i64 0, i64 %tmp38.i.i
-  %tmp39.i.i = addrspacecast %char3 addrspace(4)* %tmp39.i5.i to %char3*
-  %tmp42.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 0
-  %tmp43.i.i = load i8, i8* %tmp42.i.i, align 1
+  %tmp39.i5.i = getelementptr inbounds [27 x %char3], ptr addrspace(4) @global, i64 0, i64 %tmp38.i.i
+  %tmp39.i.i = addrspacecast ptr addrspace(4) %tmp39.i5.i to ptr
+  %tmp43.i.i = load i8, ptr %tmp39.i.i, align 1
   %tmp44.i.i = sext i8 %tmp43.i.i to i32
   %tmp45.i.i = add nsw i32 %tmp41.i.i, %tmp44.i.i
-  %tmp50.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 1
-  %tmp51.i.i = load i8, i8* %tmp50.i.i, align 1
+  %tmp50.i.i = getelementptr inbounds %char3, ptr %tmp39.i.i, i64 0, i32 1
+  %tmp51.i.i = load i8, ptr %tmp50.i.i, align 1
   %tmp52.i.i = sext i8 %tmp51.i.i to i32
   %tmp53.i.i = add nsw i32 %tmp49.i.i, %tmp52.i.i
-  %tmp56.i.i = getelementptr inbounds %char3, %char3* %tmp39.i.i, i64 0, i32 2
-  %tmp57.i.i = load i8, i8* %tmp56.i.i, align 1
+  %tmp56.i.i = getelementptr inbounds %char3, ptr %tmp39.i.i, i64 0, i32 2
+  %tmp57.i.i = load i8, ptr %tmp56.i.i, align 1
   %tmp58.i.i = sext i8 %tmp57.i.i to i32
   %tmp59.i.i = add nsw i32 %tmp55.i.i, %tmp58.i.i
   %tmp60.i.i = tail call %int3 @hoge(i32 %tmp45.i.i, i32 %tmp53.i.i, i32 %tmp59.i.i) #1
-  %tmp62.i.i = load i32, i32* %tmp61.i.i, align 4
-  store i32 %tmp62.i.i, i32* %tmp64.i, align 8
-  %tmp20 = load i64, i64* %tmp19, align 8
-  %tmp22 = getelementptr inbounds i8, i8* %tmp6, i64 %tmp20
-  %tmp24 = getelementptr inbounds i8, i8* %tmp22, i64 80
-  %13 = bitcast i8* %tmp24 to i32*
-  %tmp25 = load i32, i32* %13, align 4
-  %tmp36 = load %float4*, %float4** %tmp34, align 8
+  %tmp62.i.i = load i32, ptr %tmp61.i.i, align 4
+  store i32 %tmp62.i.i, ptr %tmp64.i, align 8
+  %tmp20 = load i64, ptr %tmp18, align 8
+  %tmp22 = getelementptr inbounds i8, ptr %tmp, i64 %tmp20
+  %tmp24 = getelementptr inbounds i8, ptr %tmp22, i64 80
+  %tmp25 = load i32, ptr %tmp24, align 4
+  %tmp36 = load ptr, ptr %arg, align 8
   %tmp37 = zext i32 %tmp25 to i64
-  %tmp38 = getelementptr inbounds %float4, %float4* %tmp36, i64 %tmp37
-  %tmp39 = bitcast %float4* %tmp38 to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 1 %tmp39, i64 undef, i1 false)
-  %tmp40 = getelementptr inbounds i8, i8* %tmp22, i64 48
-  %tmp41 = getelementptr inbounds i8, i8* %tmp40, i64 8
-  %14 = bitcast i8* %tmp41 to float*
-  %tmp42 = load float, float* %14, align 4
-  %tmp44 = load float, float* inttoptr (i64 8 to float*), align 8
+  %tmp38 = getelementptr inbounds %float4, ptr %tmp36, i64 %tmp37
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 undef, ptr align 1 %tmp38, i64 undef, i1 false)
+  %tmp40 = getelementptr inbounds i8, ptr %tmp22, i64 48
+  %tmp41 = getelementptr inbounds i8, ptr %tmp40, i64 8
+  %tmp42 = load float, ptr %tmp41, align 4
+  %tmp44 = load float, ptr inttoptr (i64 8 to ptr), align 8
   %tmp45 = fsub contract float %tmp42, %tmp44
   %tmp46 = tail call %float4 @snork(float %tmp45)
   %tmp.i = tail call i64 @foo()
-  %tmp10.i = load i64, i64* %9, align 16
+  %tmp10.i = load i64, ptr %tmp9.i, align 16
   %tmp11.i = add i64 %tmp10.i, %tmp.i
-  store i64 %tmp11.i, i64* %9, align 16, !tbaa !1
+  store i64 %tmp11.i, ptr %tmp9.i, align 16, !tbaa !1
   %tmp43.i = add i64 %tmp11.i, %tmp42.i
-  %tmp44.i = getelementptr inbounds i16, i16* %tmp5, i64 %tmp43.i
-  %tmp45.i = load i16, i16* %tmp44.i, align 2
+  %tmp44.i = getelementptr inbounds i16, ptr %tmp5, i64 %tmp43.i
+  %tmp45.i = load i16, ptr %tmp44.i, align 2
   %tmp47.i = icmp eq i16 %tmp45.i, -1
   %extract21 = lshr i16 %tmp45.i, 11
   %extract.t22 = trunc i16 %extract21 to i8
@@ -235,7 +212,7 @@ attributes #1 = { nounwind }
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (%struct.spam.2*)* @barney, !"kernel", i32 1}
+!0 = !{ptr @barney, !"kernel", i32 1}
 !1 = !{!2, !11, i64 64}
 !2 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !3, i64 0, !3, i64 8, !6, i64 16, !8, i64 32, !9, i64 44, !10, i64 48, !11, i64 64, !9, i64 72, !4, i64 76, !9, i64 80}
 !3 = !{!"any pointer", !4, i64 0}

diff  --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll
index 2321986000c27..80113f510a05a 100644
--- a/llvm/test/CodeGen/NVPTX/bf16.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16.ll
@@ -5,31 +5,31 @@
 @"bfloat_array" = addrspace(1) constant [4 x bfloat]
                 [bfloat 0xR0201, bfloat 0xR0403, bfloat 0xR0605, bfloat 0xR0807]
 
-define void @test_load_store(bfloat addrspace(1)* %in, bfloat addrspace(1)* %out) {
+define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_load_store
 ; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load bfloat, bfloat addrspace(1)* %in
-  store bfloat %val, bfloat addrspace(1) * %out
+  %val = load bfloat, ptr addrspace(1) %in
+  store bfloat %val, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_bitcast_from_bfloat(bfloat addrspace(1)* %in, i16 addrspace(1)* %out) {
+define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_bitcast_from_bfloat
 ; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load bfloat, bfloat addrspace(1) * %in
+  %val = load bfloat, ptr addrspace(1) %in
   %val_int = bitcast bfloat %val to i16
-  store i16 %val_int, i16 addrspace(1)* %out
+  store i16 %val_int, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_bitcast_to_bfloat(bfloat addrspace(1)* %out, i16 addrspace(1)* %in) {
+define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; CHECK-LABEL: @test_bitcast_to_bfloat
 ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load i16, i16 addrspace(1)* %in
+  %val = load i16, ptr addrspace(1) %in
   %val_fp = bitcast i16 %val to bfloat
-  store bfloat %val_fp, bfloat addrspace(1)* %out
+  store bfloat %val_fp, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/branch-fold.ll b/llvm/test/CodeGen/NVPTX/branch-fold.ll
index 72e3d020a9015..a1595d1d8da8e 100644
--- a/llvm/test/CodeGen/NVPTX/branch-fold.ll
+++ b/llvm/test/CodeGen/NVPTX/branch-fold.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
-define void @foo(i32 %x, float* %output) {
+define void @foo(i32 %x, ptr %output) {
 ; CHECK-LABEL: .visible .func foo(
 ; CHECK-NOT: bra.uni
 ; CHECK-NOT: LBB0_
@@ -21,7 +21,7 @@ else:
   br label %merge
 
 merge:
-  store float 2.0, float* %output
+  store float 2.0, ptr %output
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/bug17709.ll b/llvm/test/CodeGen/NVPTX/bug17709.ll
index cc5c11bde19c0..94ea3f502d241 100644
--- a/llvm/test/CodeGen/NVPTX/bug17709.ll
+++ b/llvm/test/CodeGen/NVPTX/bug17709.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
-define private ptx_device { double, double } @__utils1_MOD_trace(%"struct.array2_complex(kind=8).43.5.57"* noalias %m) {
+define private ptx_device { double, double } @__utils1_MOD_trace(ptr noalias %m) {
 entry:
   ;unreachable
   %t0 = insertvalue {double, double} undef, double 1.0, 0
@@ -14,14 +14,14 @@ entry:
 }
 
 %struct.descriptor_dimension.0.52 = type { i64, i64, i64 }
-%"struct.array2_complex(kind=8).37.18.70" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
-%"struct.array2_complex(kind=8).43.5.57" = type { i8*, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).37.18.70" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
+%"struct.array2_complex(kind=8).43.5.57" = type { ptr, i64, i64, [2 x %struct.descriptor_dimension.0.52] }
 @replacementOfAlloca8 = private global %"struct.array2_complex(kind=8).37.18.70" zeroinitializer, align 4096
 
 ; CHECK: .visible .entry __kernelgen_main
-define ptx_kernel void @__kernelgen_main(i32* nocapture %args, i32*) {
+define ptx_kernel void @__kernelgen_main(ptr nocapture %args, ptr) {
 entry:
-  %1 = tail call ptx_device { double, double } bitcast ({ double, double } (%"struct.array2_complex(kind=8).43.5.57"*)* @__utils1_MOD_trace to { double, double } (%"struct.array2_complex(kind=8).37.18.70"*)*)(%"struct.array2_complex(kind=8).37.18.70"* noalias @replacementOfAlloca8)
+  %1 = tail call ptx_device { double, double } @__utils1_MOD_trace(ptr noalias @replacementOfAlloca8)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll
index 5a8a9518bc510..6c7e152212046 100644
--- a/llvm/test/CodeGen/NVPTX/bug21465.ll
+++ b/llvm/test/CodeGen/NVPTX/bug21465.ll
@@ -8,16 +8,16 @@ target triple = "nvptx64-unknown-unknown"
 %struct.S = type { i32, i32 }
 
 ; Function Attrs: nounwind
-define void @_Z11TakesStruct1SPi(%struct.S* byval(%struct.S) nocapture readonly %input, i32* nocapture %output) #0 {
+define void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 {
 entry:
 ; CHECK-LABEL: @_Z11TakesStruct1SPi
 ; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi(
-; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)*
-  %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
-  %0 = load i32, i32* %b, align 4
+; CHECK: addrspacecast ptr %input to ptr addrspace(101)
+  %b = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+  %0 = load i32, ptr %b, align 4
 ; PTX-NOT: ld.param.u32 {{%r[0-9]+}}, [{{%rd[0-9]+}}]
 ; PTX: ld.param.u32 [[value:%r[0-9]+]], [_Z11TakesStruct1SPi_param_0+4]
-  store i32 %0, i32* %output, align 4
+  store i32 %0, ptr %output, align 4
 ; PTX-NEXT: st.global.u32 [{{%rd[0-9]+}}], [[value]]
   ret void
 }
@@ -26,4 +26,4 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, !"kernel", i32 1}
+!0 = !{ptr @_Z11TakesStruct1SPi, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/bug22246.ll b/llvm/test/CodeGen/NVPTX/bug22246.ll
index cda3e7743827c..be689861ab5b7 100644
--- a/llvm/test/CodeGen/NVPTX/bug22246.ll
+++ b/llvm/test/CodeGen/NVPTX/bug22246.ll
@@ -5,11 +5,11 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
 ; CHECK-LABEL: _Z3foobbbPb
-define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, ptr nocapture %output) {
 entry:
 ; CHECK: selp.b32       %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
   %.sink.v = select i1 %p1, i1 %p2, i1 %p3
   %frombool5 = zext i1 %.sink.v to i8
-  store i8 %frombool5, i8* %output, align 1
+  store i8 %frombool5, ptr %output, align 1
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll
index ddf4c9fce1451..bb4a7c0ae5a81 100644
--- a/llvm/test/CodeGen/NVPTX/bug22322.ll
+++ b/llvm/test/CodeGen/NVPTX/bug22322.ll
@@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda"
 
 ; Function Attrs: nounwind
 ; CHECK-LABEL: some_kernel
-define void @some_kernel(%class.float3* nocapture %dst) #0 {
+define void @some_kernel(ptr nocapture %dst) #0 {
 _ZL11compute_vecRK6float3jb.exit:
   %ret_vec.sroa.8.i = alloca float, align 4
   %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
@@ -17,23 +17,22 @@ _ZL11compute_vecRK6float3jb.exit:
   %3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
   %4 = add nsw i32 %2, %3
   %5 = zext i32 %4 to i64
-  %6 = bitcast float* %ret_vec.sroa.8.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* %6)
-  %7 = and i32 %4, 15
-  %8 = icmp eq i32 %7, 0
-  %9 = select i1 %8, float 0.000000e+00, float -1.000000e+00
-  store float %9, float* %ret_vec.sroa.8.i, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr %ret_vec.sroa.8.i)
+  %6 = and i32 %4, 15
+  %7 = icmp eq i32 %6, 0
+  %8 = select i1 %7, float 0.000000e+00, float -1.000000e+00
+  store float %8, ptr %ret_vec.sroa.8.i, align 4
 ; CHECK: max.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
-  %10 = fcmp olt float %9, 0.000000e+00
-  %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4
-  %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* %6)
-  %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0
-  store float 0.000000e+00, float* %12, align 4
-  %13 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 1
-  store float %11, float* %13, align 4
-  %14 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 2
-  store float 0.000000e+00, float* %14, align 4
+  %9 = fcmp olt float %8, 0.000000e+00
+  %ret_vec.sroa.8.i.val = load float, ptr %ret_vec.sroa.8.i, align 4
+  %10 = select i1 %9, float 0.000000e+00, float %ret_vec.sroa.8.i.val
+  call void @llvm.lifetime.end.p0(i64 4, ptr %ret_vec.sroa.8.i)
+  %11 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 0
+  store float 0.000000e+00, ptr %11, align 4
+  %12 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 1
+  store float %10, ptr %12, align 4
+  %13 = getelementptr inbounds %class.float3, ptr %dst, i64 %5, i32 2
+  store float 0.000000e+00, ptr %13, align 4
   ret void
 }
 
@@ -47,10 +46,10 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
 
 ; Function Attrs: nounwind
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #2
 
 ; Function Attrs: nounwind
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "no-signed-zeros-fp-math"="true" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -59,5 +58,5 @@ attributes #2 = { nounwind }
 !nvvm.annotations = !{!0}
 !llvm.ident = !{!1}
 
-!0 = !{void (%class.float3*)* @some_kernel, !"kernel", i32 1}
+!0 = !{ptr @some_kernel, !"kernel", i32 1}
 !1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}

diff  --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
index 86925578c352e..80e5795134f21 100644
--- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll
+++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
@@ -11,20 +11,18 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
 ; CHECK-LABEL: spam
-define ptx_kernel void @spam(i8 addrspace(1)* noalias nocapture readonly %arg, i8 addrspace(1)* noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
+define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
 bb:
-  %tmp = bitcast i8 addrspace(1)* %arg to i16 addrspace(1)*
-  %tmp4 = bitcast i8 addrspace(1)* %arg1 to i64 addrspace(1)*
   %tmp5 = add nsw i64 %arg3, 8
-  %tmp6 = getelementptr i16, i16 addrspace(1)* %tmp, i64 %tmp5
+  %tmp6 = getelementptr i16, ptr addrspace(1) %arg, i64 %tmp5
 ; CHECK: ld.global.nc.u16
-  %tmp7 = load i16, i16 addrspace(1)* %tmp6, align 2
+  %tmp7 = load i16, ptr addrspace(1) %tmp6, align 2
 ; CHECK: cvt.s32.s16
   %tmp8 = sext i16 %tmp7 to i64
   %tmp9 = mul nsw i64 %tmp8, %tmp8
-  %tmp10 = load i64, i64 addrspace(1)* %tmp4, align 8
+  %tmp10 = load i64, ptr addrspace(1) %arg1, align 8
   %tmp11 = add nsw i64 %tmp9, %tmp10
-  store i64 %tmp11, i64 addrspace(1)* %tmp4, align 8
+  store i64 %tmp11, ptr addrspace(1) %arg1, align 8
   ret void
 }
 
@@ -32,4 +30,4 @@ attributes #0 = { norecurse nounwind "polly.skip.fn" }
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64, i64)* @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
+!0 = !{ptr @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}

diff  --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll
index ec10ba7ef5550..22cf2189aae39 100644
--- a/llvm/test/CodeGen/NVPTX/bug26185.ll
+++ b/llvm/test/CodeGen/NVPTX/bug26185.ll
@@ -8,51 +8,51 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-unknown-unknown"
 
 ; CHECK-LABEL: ex_zext
-define void @ex_zext(i8* noalias readonly %data, i32* %res) {
+define void @ex_zext(ptr noalias readonly %data, ptr %res) {
 entry:
 ; CHECK: ld.global.nc.u8
-  %val = load i8, i8* %data
+  %val = load i8, ptr %data
 ; CHECK: cvt.u32.u8
   %valext = zext i8 %val to i32
-  store i32 %valext, i32* %res
+  store i32 %valext, ptr %res
   ret void
 }
 
 ; CHECK-LABEL: ex_sext
-define void @ex_sext(i8* noalias readonly %data, i32* %res) {
+define void @ex_sext(ptr noalias readonly %data, ptr %res) {
 entry:
 ; CHECK: ld.global.nc.u8
-  %val = load i8, i8* %data
+  %val = load i8, ptr %data
 ; CHECK: cvt.s32.s8
   %valext = sext i8 %val to i32
-  store i32 %valext, i32* %res
+  store i32 %valext, ptr %res
   ret void
 }
 
 ; CHECK-LABEL: ex_zext_v2
-define void @ex_zext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+define void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
 entry:
 ; CHECK: ld.global.nc.v2.u8
-  %val = load <2 x i8>, <2 x i8>* %data
+  %val = load <2 x i8>, ptr %data
 ; CHECK: cvt.u32.u16
   %valext = zext <2 x i8> %val to <2 x i32>
-  store <2 x i32> %valext, <2 x i32>* %res
+  store <2 x i32> %valext, ptr %res
   ret void
 }
 
 ; CHECK-LABEL: ex_sext_v2
-define void @ex_sext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+define void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
 entry:
 ; CHECK: ld.global.nc.v2.u8
-  %val = load <2 x i8>, <2 x i8>* %data
+  %val = load <2 x i8>, ptr %data
 ; CHECK: cvt.s32.s8
   %valext = sext <2 x i8> %val to <2 x i32>
-  store <2 x i32> %valext, <2 x i32>* %res
+  store <2 x i32> %valext, ptr %res
   ret void
 }
 
 !nvvm.annotations = !{!0,!1,!2,!3}
-!0 = !{void (i8*, i32*)* @ex_zext, !"kernel", i32 1}
-!1 = !{void (i8*, i32*)* @ex_sext, !"kernel", i32 1}
-!2 = !{void (<2 x i8>*, <2 x i32>*)* @ex_zext_v2, !"kernel", i32 1}
-!3 = !{void (<2 x i8>*, <2 x i32>*)* @ex_sext_v2, !"kernel", i32 1}
+!0 = !{ptr @ex_zext, !"kernel", i32 1}
+!1 = !{ptr @ex_sext, !"kernel", i32 1}
+!2 = !{ptr @ex_zext_v2, !"kernel", i32 1}
+!3 = !{ptr @ex_sext_v2, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/bug41651.ll b/llvm/test/CodeGen/NVPTX/bug41651.ll
index b093ab7b2c2a8..23bd59004c5bc 100644
--- a/llvm/test/CodeGen/NVPTX/bug41651.ll
+++ b/llvm/test/CodeGen/NVPTX/bug41651.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
-%func = type { i32 (i32, i32)** }
+%func = type { ptr }
 
 ; CHECK: foo
 ; CHECK: call

diff  --git a/llvm/test/CodeGen/NVPTX/bug52623.ll b/llvm/test/CodeGen/NVPTX/bug52623.ll
index eb352d7d5fb52..4359a662ef44e 100644
--- a/llvm/test/CodeGen/NVPTX/bug52623.ll
+++ b/llvm/test/CodeGen/NVPTX/bug52623.ll
@@ -7,15 +7,14 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
-%printf_args.0.8 = type { i8* }
+%printf_args.0.8 = type { ptr }
 
 define internal i32 @__kmpc_get_hardware_thread_id_in_block(i1 %0) {
   %2 = alloca %printf_args.0.8, i32 0, align 8
-  %3 = bitcast %printf_args.0.8* %2 to i8*
   br i1 true, label %._crit_edge1, label %._crit_edge
 
 ._crit_edge:                                      ; preds = %1, %._crit_edge
-  %4 = call i32 null(i8* null, i8* %3)
+  %3 = call i32 null(ptr null, ptr %2)
   br i1 %0, label %._crit_edge, label %._crit_edge1
 
 ._crit_edge1:                                     ; preds = %._crit_edge, %1

diff  --git a/llvm/test/CodeGen/NVPTX/bypass-div.ll b/llvm/test/CodeGen/NVPTX/bypass-div.ll
index 3c8f5d0bc6c64..6cabdf7ff10d6 100644
--- a/llvm/test/CodeGen/NVPTX/bypass-div.ll
+++ b/llvm/test/CodeGen/NVPTX/bypass-div.ll
@@ -4,78 +4,78 @@
 ; 64-bit divides and rems should be split into a fast and slow path where
 ; the fast path uses a 32-bit operation.
 
-define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
+define void @sdiv64(i64 %a, i64 %b, ptr %retptr) {
 ; CHECK-LABEL: sdiv64(
 ; CHECK:        div.s64
 ; CHECK:        div.u32
 ; CHECK:        ret
   %d = sdiv i64 %a, %b
-  store i64 %d, i64* %retptr
+  store i64 %d, ptr %retptr
   ret void
 }
 
-define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
+define void @udiv64(i64 %a, i64 %b, ptr %retptr) {
 ; CHECK-LABEL: udiv64(
 ; CHECK:        div.u64
 ; CHECK:        div.u32
 ; CHECK:        ret
   %d = udiv i64 %a, %b
-  store i64 %d, i64* %retptr
+  store i64 %d, ptr %retptr
   ret void
 }
 
-define void @srem64(i64 %a, i64 %b, i64* %retptr) {
+define void @srem64(i64 %a, i64 %b, ptr %retptr) {
 ; CHECK-LABEL: srem64(
 ; CHECK:        rem.s64
 ; CHECK:        rem.u32
 ; CHECK:        ret
   %d = srem i64 %a, %b
-  store i64 %d, i64* %retptr
+  store i64 %d, ptr %retptr
   ret void
 }
 
-define void @urem64(i64 %a, i64 %b, i64* %retptr) {
+define void @urem64(i64 %a, i64 %b, ptr %retptr) {
 ; CHECK-LABEL: urem64(
 ; CHECK:        rem.u64
 ; CHECK:        rem.u32
 ; CHECK:        ret
   %d = urem i64 %a, %b
-  store i64 %d, i64* %retptr
+  store i64 %d, ptr %retptr
   ret void
 }
 
-define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
+define void @sdiv32(i32 %a, i32 %b, ptr %retptr) {
 ; CHECK-LABEL: sdiv32(
 ; CHECK: div.s32
 ; CHECK-NOT: div.
   %d = sdiv i32 %a, %b
-  store i32 %d, i32* %retptr
+  store i32 %d, ptr %retptr
   ret void
 }
 
-define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
+define void @udiv32(i32 %a, i32 %b, ptr %retptr) {
 ; CHECK-LABEL: udiv32(
 ; CHECK: div.u32
 ; CHECK-NOT: div.
   %d = udiv i32 %a, %b
-  store i32 %d, i32* %retptr
+  store i32 %d, ptr %retptr
   ret void
 }
 
-define void @srem32(i32 %a, i32 %b, i32* %retptr) {
+define void @srem32(i32 %a, i32 %b, ptr %retptr) {
 ; CHECK-LABEL: srem32(
 ; CHECK: rem.s32
 ; CHECK-NOT: rem.
   %d = srem i32 %a, %b
-  store i32 %d, i32* %retptr
+  store i32 %d, ptr %retptr
   ret void
 }
 
-define void @urem32(i32 %a, i32 %b, i32* %retptr) {
+define void @urem32(i32 %a, i32 %b, ptr %retptr) {
 ; CHECK-LABEL: urem32(
 ; CHECK: rem.u32
 ; CHECK-NOT: rem.
   %d = urem i32 %a, %b
-  store i32 %d, i32* %retptr
+  store i32 %d, ptr %retptr
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 882704918a49c..3fbed871850bc 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -4,19 +4,19 @@
 ; Checks how NVPTX lowers alloca buffers and their passing to functions.
 ;
 ; Produced with the following CUDA code:
-;  extern "C" __attribute__((device)) void callee(float* f, char* buf);
+;  extern "C" __attribute__((device)) void callee(ptr f, char* buf);
 ;
-;  extern "C" __attribute__((global)) void kernel_func(float* a) {
+;  extern "C" __attribute__((global)) void kernel_func(ptr a) {
 ;    char buf[4 * sizeof(float)];
-;    *(reinterpret_cast<float*>(&buf[0])) = a[0];
-;    *(reinterpret_cast<float*>(&buf[1])) = a[1];
-;    *(reinterpret_cast<float*>(&buf[2])) = a[2];
-;    *(reinterpret_cast<float*>(&buf[3])) = a[3];
+;    *(reinterpret_cast<ptr>(&buf[0])) = a[0];
+;    *(reinterpret_cast<ptr>(&buf[1])) = a[1];
+;    *(reinterpret_cast<ptr>(&buf[2])) = a[2];
+;    *(reinterpret_cast<ptr>(&buf[3])) = a[3];
 ;    callee(a, buf);
 ;  }
 
 ; CHECK: .visible .entry kernel_func
-define void @kernel_func(float* %a) {
+define void @kernel_func(ptr %a) {
 entry:
   %buf = alloca [16 x i8], align 4
 
@@ -29,24 +29,20 @@ entry:
 ; CHECK: ld.global.f32 %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
 ; CHECK: st.local.f32 [{{%rd[0-9]+}}], %f[[A0_REG]]
 
-  %0 = load float, float* %a, align 4
-  %1 = bitcast [16 x i8]* %buf to float*
-  store float %0, float* %1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 1
-  %2 = load float, float* %arrayidx2, align 4
-  %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1
-  %3 = bitcast i8* %arrayidx3 to float*
-  store float %2, float* %3, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %a, i64 2
-  %4 = load float, float* %arrayidx4, align 4
-  %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2
-  %5 = bitcast i8* %arrayidx5 to float*
-  store float %4, float* %5, align 4
-  %arrayidx6 = getelementptr inbounds float, float* %a, i64 3
-  %6 = load float, float* %arrayidx6, align 4
-  %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3
-  %7 = bitcast i8* %arrayidx7 to float*
-  store float %6, float* %7, align 4
+  %0 = load float, ptr %a, align 4
+  store float %0, ptr %buf, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 1
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 1
+  store float %1, ptr %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i64 2
+  %2 = load float, ptr %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 2
+  store float %2, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %a, i64 3
+  %3 = load float, ptr %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3
+  store float %3, ptr %arrayidx7, align 4
 
 ; CHECK:        .param .b64 param0;
 ; CHECK-NEXT:   st.param.b64  [param0+0], %rd[[A_REG]]
@@ -55,13 +51,12 @@ entry:
 ; CHECK-NEXT:   call.uni
 ; CHECK-NEXT:   callee,
 
-  %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 0
-  call void @callee(float* %a, i8* %arraydecay) #2
+  call void @callee(ptr %a, ptr %buf) #2
   ret void
 }
 
-declare void @callee(float*, i8*)
+declare void @callee(ptr, ptr)
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (float*)* @kernel_func, !"kernel", i32 1}
+!0 = !{ptr @kernel_func, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
index 3a04f239d66a1..7a0d78c5376e9 100644
--- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -20,26 +20,25 @@ target triple = "nvptx64-nvidia-cuda"
 ; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE,
 define weak_odr void @foo() {
 entry:
-  %call.i.i.i = tail call %"class.complex" bitcast (%complex_half ()* @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE to %"class.complex" (i32, i32, %"class.complex"*)*)(i32 0, i32 0, %"class.complex"* byval(%"class.complex") null)
+  %call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
   ret void
 }
 
 ;; Function pointers can escape, so we have to use a conservative
 ;; alignment for a function that has address taken.
 ;;
-declare i8* @usefp(i8* %fp)
+declare ptr @usefp(ptr %fp)
 ; CHECK: .func callee(
 ; CHECK-NEXT: .param .align 4 .b8 callee_param_0[4]
-define internal void @callee(%"class.complex"* byval(%"class.complex") %byval_arg) {
+define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
   ret void
 }
 define void @boom() {
-  %fp = call i8* @usefp(i8* bitcast (void (%"class.complex"*)* @callee to i8*))
-  %cast = bitcast i8* %fp to void (%"class.complex"*)*
+  %fp = call ptr @usefp(ptr @callee)
   ; CHECK: .param .align 4 .b8 param0[4];
   ; CHECK: st.param.v2.b16 [param0+0]
   ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
-  call void %cast(%"class.complex"* byval(%"class.complex") null)
+  call void %fp(ptr byval(%"class.complex") null)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/callchain.ll b/llvm/test/CodeGen/NVPTX/callchain.ll
index a6f08f1f21be5..2d9eb738cc3bb 100644
--- a/llvm/test/CodeGen/NVPTX/callchain.ll
+++ b/llvm/test/CodeGen/NVPTX/callchain.ll
@@ -3,9 +3,8 @@
 
 target triple = "nvptx"
 
-define void @foo(i8* %ptr) {
-  %fnptr = bitcast i8* %ptr to void ()*
+define void @foo(ptr %ptr) {
 ; CHECK: prototype_0 : .callprototype ()_ ()
-  tail call void %fnptr()
+  tail call void %ptr()
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/calling-conv.ll b/llvm/test/CodeGen/NVPTX/calling-conv.ll
index 962138b2c0bb2..7a5655a868185 100644
--- a/llvm/test/CodeGen/NVPTX/calling-conv.ll
+++ b/llvm/test/CodeGen/NVPTX/calling-conv.ll
@@ -7,21 +7,21 @@
 ;; Kernel function using ptx_kernel calling conv
 
 ; CHECK: .entry kernel_func
-define ptx_kernel void @kernel_func(float* %a) {
+define ptx_kernel void @kernel_func(ptr %a) {
 ; CHECK: ret
   ret void
 }
 
 ;; Device function
 ; CHECK: .func device_func
-define void @device_func(float* %a) {
+define void @device_func(ptr %a) {
 ; CHECK: ret
   ret void
 }
 
 ;; Kernel function using NVVM metadata
 ; CHECK: .entry metadata_kernel
-define void @metadata_kernel(float* %a) {
+define void @metadata_kernel(ptr %a) {
 ; CHECK: ret
   ret void
 }
@@ -29,4 +29,4 @@ define void @metadata_kernel(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = !{void (float*)* @metadata_kernel, !"kernel", i32 1}
+!1 = !{ptr @metadata_kernel, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
index 18142450490c7..5e85bf4554546 100644
--- a/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
+++ b/llvm/test/CodeGen/NVPTX/convergent-mir-call.ll
@@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda"
 declare void @conv() convergent
 declare void @not_conv()
 
-define void @test(void ()* %f) {
+define void @test(ptr %f) {
   ; CHECK: ConvergentCallUniPrintCall
   ; CHECK-NEXT: @conv
   call void @conv()

diff  --git a/llvm/test/CodeGen/NVPTX/ctlz.ll b/llvm/test/CodeGen/NVPTX/ctlz.ll
index 504eb0be8804c..0fbe45b466f69 100644
--- a/llvm/test/CodeGen/NVPTX/ctlz.ll
+++ b/llvm/test/CodeGen/NVPTX/ctlz.ll
@@ -106,7 +106,7 @@ define i16 @myctlz_ret16_2(i16 %a) {
 ; Here we store the result of ctlz.16 into an i16 pointer, so the trunc should
 ; remain.
 ; CHECK-LABEL: myctlz_store16(
-define void @myctlz_store16(i16 %a, i16* %b) {
+define void @myctlz_store16(i16 %a, ptr %b) {
 ; CHECK: ld.param.
 ; CHECK-NEXT: cvt.u32.u16
 ; CHECK-NEXT: clz.b32
@@ -115,11 +115,11 @@ define void @myctlz_store16(i16 %a, i16* %b) {
 ; CHECK: st.{{[a-z]}}16
 ; CHECK: ret;
   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
-  store i16 %val, i16* %b
+  store i16 %val, ptr %b
   ret void
 }
 ; CHECK-LABEL: myctlz_store16_2(
-define void @myctlz_store16_2(i16 %a, i16* %b) {
+define void @myctlz_store16_2(i16 %a, ptr %b) {
 ; CHECK: ld.param.
 ; CHECK-NEXT: cvt.u32.u16
 ; CHECK-NEXT: clz.b32
@@ -128,6 +128,6 @@ define void @myctlz_store16_2(i16 %a, i16* %b) {
 ; CHECK: st.{{[a-z]}}16
 ; CHECK: ret;
   %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
-  store i16 %val, i16* %b
+  store i16 %val, ptr %b
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/disable-opt.ll b/llvm/test/CodeGen/NVPTX/disable-opt.ll
index 0b36120f760f5..18cc28a9560df 100644
--- a/llvm/test/CodeGen/NVPTX/disable-opt.ll
+++ b/llvm/test/CodeGen/NVPTX/disable-opt.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O0 | %ptxas-verify %}
 
-define void @foo(i32* %output) {
+define void @foo(ptr %output) {
 ; CHECK-LABEL: .visible .func foo(
 entry:
   %local = alloca i32
 ; CHECK: __local_depot
-  store i32 1, i32* %local
-  %0 = load i32, i32* %local
-  store i32 %0, i32* %output
+  store i32 1, ptr %local
+  %0 = load i32, ptr %local
+  store i32 %0, ptr %output
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/divrem-combine.ll b/llvm/test/CodeGen/NVPTX/divrem-combine.ll
index 856f20d95a170..fa712e57b8ea8 100644
--- a/llvm/test/CodeGen/NVPTX/divrem-combine.ll
+++ b/llvm/test/CodeGen/NVPTX/divrem-combine.ll
@@ -16,7 +16,7 @@
 ; during NVPTX isel, at -O2.  At -O0, we should leave it alone.
 
 ; CHECK-LABEL: sdiv32(
-define void @sdiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @sdiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
   %quot = sdiv i32 %n, %d
 
@@ -27,14 +27,14 @@ define void @sdiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
   %rem = srem i32 %n, %d
 
   ; O2: st{{.*}}[[quot]]
-  store i32 %quot, i32* %quot_ret
+  store i32 %quot, ptr %quot_ret
   ; O2: st{{.*}}[[rem]]
-  store i32 %rem, i32* %rem_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; CHECK-LABEL: udiv32(
-define void @udiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @udiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
   %quot = udiv i32 %n, %d
 
@@ -48,67 +48,67 @@ define void @udiv32(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
   %rem = urem i32 %n, %d
 
   ; O2: st{{.*}}[[quot]]
-  store i32 %quot, i32* %quot_ret
+  store i32 %quot, ptr %quot_ret
   ; O2: st{{.*}}[[rem]]
-  store i32 %rem, i32* %rem_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; Check that we don't perform this optimization if one operation is signed and
 ; the other isn't.
 ; CHECK-LABEL: mismatched_types1(
-define void @mismatched_types1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_types1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.u32
   ; CHECK: rem.s32
   %quot = udiv i32 %n, %d
   %rem = srem i32 %n, %d
-  store i32 %quot, i32* %quot_ret
-  store i32 %rem, i32* %rem_ret
+  store i32 %quot, ptr %quot_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; CHECK-LABEL: mismatched_types2(
-define void @mismatched_types2(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_types2(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.s32
   ; CHECK: rem.u32
   %quot = sdiv i32 %n, %d
   %rem = urem i32 %n, %d
-  store i32 %quot, i32* %quot_ret
-  store i32 %rem, i32* %rem_ret
+  store i32 %quot, ptr %quot_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; Check that we don't perform this optimization if the inputs to the div don't
 ; match the inputs to the rem.
 ; CHECK-LABEL: mismatched_inputs1(
-define void @mismatched_inputs1(i32 %n, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.s32
   ; CHECK: rem.s32
   %quot = sdiv i32 %n, %d
   %rem = srem i32 %d, %n
-  store i32 %quot, i32* %quot_ret
-  store i32 %rem, i32* %rem_ret
+  store i32 %quot, ptr %quot_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; CHECK-LABEL: mismatched_inputs2(
-define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.s32
   ; CHECK: rem.s32
   %quot = sdiv i32 %n1, %d
   %rem = srem i32 %n2, %d
-  store i32 %quot, i32* %quot_ret
-  store i32 %rem, i32* %rem_ret
+  store i32 %quot, ptr %quot_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }
 
 ; CHECK-LABEL: mismatched_inputs3(
-define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, i32* %quot_ret, i32* %rem_ret) {
+define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, ptr %quot_ret, ptr %rem_ret) {
   ; CHECK: div.s32
   ; CHECK: rem.s32
   %quot = sdiv i32 %n, %d1
   %rem = srem i32 %n, %d2
-  store i32 %quot, i32* %quot_ret
-  store i32 %rem, i32* %rem_ret
+  store i32 %quot, ptr %quot_ret
+  store i32 %rem, ptr %rem_ret
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/extloadv.ll b/llvm/test/CodeGen/NVPTX/extloadv.ll
index 8c00077a2b545..f0307c0d2b89d 100644
--- a/llvm/test/CodeGen/NVPTX/extloadv.ll
+++ b/llvm/test/CodeGen/NVPTX/extloadv.ll
@@ -1,16 +1,14 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify -arch=sm_35 %}
 
-define void @foo(float* nocapture readonly %x_value, double* nocapture %output) #0 {
-  %1 = bitcast float* %x_value to <4 x float>*
-  %2 = load <4 x float>, <4 x float>* %1, align 16
-  %3 = fpext <4 x float> %2 to <4 x double>
+define void @foo(ptr nocapture readonly %x_value, ptr nocapture %output) #0 {
+  %1 = load <4 x float>, ptr %x_value, align 16
+  %2 = fpext <4 x float> %1 to <4 x double>
 ; CHECK-NOT: ld.v2.f32 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}];
 ; CHECK:  cvt.f64.f32
 ; CHECK:  cvt.f64.f32
 ; CHECK:  cvt.f64.f32
 ; CHECK:  cvt.f64.f32
-  %4 = bitcast double* %output to <4 x double>*
-  store <4 x double> %3, <4 x double>* %4
+  store <4 x double> %2, ptr %output
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index c64af05f1ebfe..ca432fe1715e1 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -209,8 +209,8 @@ define half @test_frem(half %a, half %b) #0 {
 ; CHECK-DAG:  ld.param.u64    %[[PTR:rd[0-9]+]], [test_store_param_1];
 ; CHECK-NEXT: st.b16          [%[[PTR]]], [[A]];
 ; CHECK-NEXT: ret;
-define void @test_store(half %a, half* %b) #0 {
-  store half %a, half* %b
+define void @test_store(half %a, ptr %b) #0 {
+  store half %a, ptr %b
   ret void
 }
 
@@ -219,8 +219,8 @@ define void @test_store(half %a, half* %b) #0 {
 ; CHECK-NEXT: ld.b16          [[R:%h[0-9]+]], [%[[PTR]]];
 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
 ; CHECK-NEXT: ret;
-define half @test_load(half* %a) #0 {
-  %r = load half, half* %a
+define half @test_load(ptr %a) #0 {
+  %r = load half, ptr %a
   ret half %r
 }
 
@@ -232,9 +232,9 @@ define half @test_load(half* %a) #0 {
 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
 ; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
 ; CHECK: ret
-define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
-  %1 = load half, half * %from , align 1
-  store half %1, half * %to , align 1
+define void @test_halfp0a1(ptr noalias readonly %from, ptr %to) {
+  %1 = load half, ptr %from , align 1
+  store half %1, ptr %to , align 1
   ret void
 }
 
@@ -608,14 +608,14 @@ define i1 @test_fcmp_ord(half %a, half %b) #0 {
 ; CHECK:      [[LABEL]]:
 ; CHECK:      st.u32  [%[[D]]],
 ; CHECK:      ret;
-define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
+define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 {
   %c = fcmp uge half %a, %b
   br i1 %c, label %then, label %else
 then:
-  store i32 0, i32* %p1
+  store i32 0, ptr %p1
   ret void
 else:
-  store i32 0, i32* %p2
+  store i32 0, ptr %p2
   ret void
 }
 
@@ -634,19 +634,19 @@ else:
 ; CHECK:      @[[PRED]] bra   [[LOOP]];
 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
 ; CHECK:      ret;
-define half @test_phi(half* %p1) #0 {
+define half @test_phi(ptr %p1) #0 {
 entry:
-  %a = load half, half* %p1
+  %a = load half, ptr %p1
   br label %loop
 loop:
   %r = phi half [%a, %entry], [%b, %loop]
-  %b = load half, half* %p1
-  %c = call i1 @test_dummy(half* %p1)
+  %b = load half, ptr %p1
+  %c = call i1 @test_dummy(ptr %p1)
   br i1 %c, label %loop, label %return
 return:
   ret half %r
 }
-declare i1 @test_dummy(half* %p1) #0
+declare i1 @test_dummy(ptr %p1) #0
 
 ; CHECK-LABEL: test_fptosi_i32(
 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptosi_i32_param_0];

diff  --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index f45093ea74d33..c83e370af1fbf 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -279,9 +279,9 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
 ; CHECK:        mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]];
 ; CHECK-DAG:    st.v2.b16       [%[[B]]], {[[E0]], [[E1]]};
 ; CHECK:        ret;
-define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
-  %t1 = load <2 x half>, <2 x half>* %a
-  store <2 x half> %t1, <2 x half>* %b, align 16
+define void @test_ldst_v2f16(ptr %a, ptr %b) {
+  %t1 = load <2 x half>, ptr %a
+  store <2 x half> %t1, ptr %b, align 16
   ret void
 }
 
@@ -296,9 +296,9 @@ define void @test_ldst_v2f16(<2 x half>* %a, <2 x half>* %b) {
 ; CHECK-DAG:    st.u32          [%[[B]]],
 ; CHECK-DAG:    st.b16          [%[[B]]+4],
 ; CHECK:        ret;
-define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
-  %t1 = load <3 x half>, <3 x half>* %a
-  store <3 x half> %t1, <3 x half>* %b, align 16
+define void @test_ldst_v3f16(ptr %a, ptr %b) {
+  %t1 = load <3 x half>, ptr %a
+  store <3 x half> %t1, ptr %b, align 16
   ret void
 }
 
@@ -308,9 +308,9 @@ define void @test_ldst_v3f16(<3 x half>* %a, <3 x half>* %b) {
 ; CHECK-DAG:    ld.v4.b16       {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]];
 ; CHECK-DAG:    st.v4.b16       [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK:        ret;
-define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
-  %t1 = load <4 x half>, <4 x half>* %a
-  store <4 x half> %t1, <4 x half>* %b, align 16
+define void @test_ldst_v4f16(ptr %a, ptr %b) {
+  %t1 = load <4 x half>, ptr %a
+  store <4 x half> %t1, ptr %b, align 16
   ret void
 }
 
@@ -320,9 +320,9 @@ define void @test_ldst_v4f16(<4 x half>* %a, <4 x half>* %b) {
 ; CHECK-DAG:    ld.v4.b32       {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [%[[A]]];
 ; CHECK-DAG:    st.v4.b32       [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]};
 ; CHECK:        ret;
-define void @test_ldst_v8f16(<8 x half>* %a, <8 x half>* %b) {
-  %t1 = load <8 x half>, <8 x half>* %a
-  store <8 x half> %t1, <8 x half>* %b, align 16
+define void @test_ldst_v8f16(ptr %a, ptr %b) {
+  %t1 = load <8 x half>, ptr %a
+  store <8 x half> %t1, ptr %b, align 16
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/fp16.ll b/llvm/test/CodeGen/NVPTX/fp16.ll
index fc16755c93803..207972c9275fa 100644
--- a/llvm/test/CodeGen/NVPTX/fp16.ll
+++ b/llvm/test/CodeGen/NVPTX/fp16.ll
@@ -8,39 +8,39 @@ declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
 
 ; CHECK-LABEL: @test_convert_fp16_to_fp32
 ; CHECK: cvt.f32.f16
-define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16, i16 addrspace(1)* %in, align 2
+define void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load i16, ptr addrspace(1) %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
 
 ; CHECK-LABEL: @test_convert_fp16_to_fp64
 ; CHECK: cvt.f64.f16
-define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16, i16 addrspace(1)* %in, align 2
+define void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load i16, ptr addrspace(1) %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
-  store double %cvt, double addrspace(1)* %out, align 4
+  store double %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
 
 ; CHECK-LABEL: @test_convert_fp32_to_fp16
 ; CHECK: cvt.rn.f16.f32
-define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float, float addrspace(1)* %in, align 2
+define void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load float, ptr addrspace(1) %in, align 2
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
-  store i16 %cvt, i16 addrspace(1)* %out, align 4
+  store i16 %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
 
 ; CHECK-LABEL: @test_convert_fp64_to_fp16
 ; CHECK: cvt.rn.f16.f64
-define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
-  %val = load double, double addrspace(1)* %in, align 2
+define void @test_convert_fp64_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
+  %val = load double, ptr addrspace(1) %in, align 2
   %cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
-  store i16 %cvt, i16 addrspace(1)* %out, align 4
+  store i16 %cvt, ptr addrspace(1) %out, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
index d75b02d97fe7b..51344b474d29e 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll
@@ -18,11 +18,11 @@ define void @func() !dbg !8 {
 ;CHECK-SAME: !dbg [[FUNCNODE:![0-9]+]]
 entry:
 ; References to the variables must be converted back to generic address space.
-; CHECK-DAG: addrspacecast ([4 x i8] addrspace(1)* @.str to [4 x i8]*)
-  %0 = load i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), align 1
+; CHECK-DAG: addrspacecast (ptr addrspace(1) @.str to ptr)
+  %0 = load i8, ptr @.str, align 1
   call void @extfunc(i8 signext %0)
-; CHECK-DAG: addrspacecast (i8 addrspace(1)* @static_var to i8*)
-  %1 = load i8, i8* @static_var, align 1
+; CHECK-DAG: addrspacecast (ptr addrspace(1) @static_var to ptr)
+  %1 = load i8, ptr @static_var, align 1
   call void @extfunc(i8 signext %1)
   ret void
 ; CHECK: ret void

diff  --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
index 0ebec35922e43..1d5e438c1b727 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -12,19 +12,19 @@ target triple = "nvptx-nvidia-cuda"
 @myconst = internal constant i32 420, align 4
 
 
-define void @foo(i32* %a, i32* %b) {
+define void @foo(ptr %a, ptr %b) {
 ; Expect one load -- @myconst isn't loaded from, because we know its value
 ; statically.
 ; CHECK: ld.global.u32
 ; CHECK: st.global.u32
 ; CHECK: st.global.u32
-  %ld1 = load i32, i32* @myglobal
-  %ld2 = load i32, i32* @myconst
-  store i32 %ld1, i32* %a
-  store i32 %ld2, i32* %b
+  %ld1 = load i32, ptr @myglobal
+  %ld2 = load i32, ptr @myconst
+  store i32 %ld1, ptr %a
+  store i32 %ld2, ptr %b
   ret void
 }
 
 
 !nvvm.annotations = !{!0}
-!0 = !{void (i32*, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/global-ctor-empty.ll b/llvm/test/CodeGen/NVPTX/global-ctor-empty.ll
index 10ca0168b30c7..77eb8714b5a9d 100644
--- a/llvm/test/CodeGen/NVPTX/global-ctor-empty.ll
+++ b/llvm/test/CodeGen/NVPTX/global-ctor-empty.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 2>&1
 
 ; Check that llc doesn't die when given an empty global ctor / dtor.
- at llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] []
- at llvm.global_dtors = appending global [0 x { i32, void ()*, i8* }] []
+ at llvm.global_ctors = appending global [0 x { i32, ptr, ptr }] []
+ at llvm.global_dtors = appending global [0 x { i32, ptr, ptr }] []

diff  --git a/llvm/test/CodeGen/NVPTX/global-ctor.ll b/llvm/test/CodeGen/NVPTX/global-ctor.ll
index b7206dce66129..43d9be01423d0 100644
--- a/llvm/test/CodeGen/NVPTX/global-ctor.ll
+++ b/llvm/test/CodeGen/NVPTX/global-ctor.ll
@@ -1,7 +1,7 @@
 ; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
 
 ; Check that llc dies when given a nonempty global ctor.
- at llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+ at llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
 
 ; CHECK: ERROR: Module has a nontrivial global ctor
 define internal void @foo() {

diff  --git a/llvm/test/CodeGen/NVPTX/global-dtor.ll b/llvm/test/CodeGen/NVPTX/global-dtor.ll
index 6125b65c40916..4bdcecbabf64f 100644
--- a/llvm/test/CodeGen/NVPTX/global-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/global-dtor.ll
@@ -1,7 +1,7 @@
 ; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
 
 ; Check that llc dies when given a nonempty global dtor.
- at llvm.global_dtors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+ at llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
 
 ; CHECK: ERROR: Module has a nontrivial global dtor
 define internal void @foo() {

diff  --git a/llvm/test/CodeGen/NVPTX/global-ordering.ll b/llvm/test/CodeGen/NVPTX/global-ordering.ll
index daea215ebd879..aaa34bb4578e5 100644
--- a/llvm/test/CodeGen/NVPTX/global-ordering.ll
+++ b/llvm/test/CodeGen/NVPTX/global-ordering.ll
@@ -10,7 +10,7 @@
 ; PTX32-NEXT: .visible .global .align 4 .u32 a2 = a;
 ; PTX64:      .visible .global .align 1 .u8 a = 2;
 ; PTX64-NEXT: .visible .global .align 8 .u64 a2 = a;
- at a2 = addrspace(1) global i8 addrspace(1)* @a
+ at a2 = addrspace(1) global ptr addrspace(1) @a
 @a = addrspace(1) global i8 2
 
 
@@ -18,5 +18,5 @@
 ; PTX32-NEXT: .visible .global .align 4 .u32 b2[2] = {b, b};
 ; PTX64:      .visible .global .align 1 .u8 b = 1;
 ; PTX64-NEXT: .visible .global .align 8 .u64 b2[2] = {b, b};
- at b2 = addrspace(1) global [2 x i8 addrspace(1)*] [i8 addrspace(1)* @b, i8 addrspace(1)* @b]
+ at b2 = addrspace(1) global [2 x ptr addrspace(1)] [ptr addrspace(1) @b, ptr addrspace(1) @b]
 @b = addrspace(1) global i8 1

diff  --git a/llvm/test/CodeGen/NVPTX/globals_lowering.ll b/llvm/test/CodeGen/NVPTX/globals_lowering.ll
index 07fa9cabbfaf8..a03fea4fa8781 100644
--- a/llvm/test/CodeGen/NVPTX/globals_lowering.ll
+++ b/llvm/test/CodeGen/NVPTX/globals_lowering.ll
@@ -8,9 +8,9 @@
 define void @foo(float %f) {
 entry:
   ; CHK: ld.shared.f32  %{{[a-zA-Z0-9]+}}, [Gbl+8];
-  %0 = load float, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+  %0 = load float, ptr addrspace(3) getelementptr inbounds ([1024 x %MyStruct], ptr addrspace(3) @Gbl, i32 0, i32 0, i32 2)
   %add = fadd float %0, %f
   ; CHK: st.shared.f32   [Gbl+8], %{{[a-zA-Z0-9]+}};
-  store float %add, float addrspace(3)* getelementptr inbounds ([1024 x %MyStruct], [1024 x %MyStruct] addrspace(3)* @Gbl, i32 0, i32 0, i32 2)
+  store float %add, ptr addrspace(3) getelementptr inbounds ([1024 x %MyStruct], ptr addrspace(3) @Gbl, i32 0, i32 0, i32 2)
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll
index 33764718476f5..d9a3b95ef341e 100644
--- a/llvm/test/CodeGen/NVPTX/half.ll
+++ b/llvm/test/CodeGen/NVPTX/half.ll
@@ -5,71 +5,71 @@
 @"half_array" = addrspace(1) constant [4 x half]
                 [half 0xH0201, half 0xH0403, half 0xH0605, half 0xH0807]
 
-define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_load_store
 ; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load half, half addrspace(1)* %in
-  store half %val, half addrspace(1) * %out
+  %val = load half, ptr addrspace(1) %in
+  store half %val, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
+define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_bitcast_from_half
 ; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load half, half addrspace(1) * %in
+  %val = load half, ptr addrspace(1) %in
   %val_int = bitcast half %val to i16
-  store i16 %val_int, i16 addrspace(1)* %out
+  store i16 %val_int, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
+define void @test_bitcast_to_half(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; CHECK-LABEL: @test_bitcast_to_half
 ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load i16, i16 addrspace(1)* %in
+  %val = load i16, ptr addrspace(1) %in
   %val_fp = bitcast i16 %val to half
-  store half %val_fp, half addrspace(1)* %out
+  store half %val_fp, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
+define void @test_extend32(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_extend32
 ; CHECK: cvt.f32.f16
 
-  %val16 = load half, half addrspace(1)* %in
+  %val16 = load half, ptr addrspace(1) %in
   %val32 = fpext half %val16 to float
-  store float %val32, float addrspace(1)* %out
+  store float %val32, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
+define void @test_extend64(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_extend64
 ; CHECK: cvt.f64.f16
 
-  %val16 = load half, half addrspace(1)* %in
+  %val16 = load half, ptr addrspace(1) %in
   %val64 = fpext half %val16 to double
-  store double %val64, double addrspace(1)* %out
+  store double %val64, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_trunc32(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: test_trunc32
 ; CHECK: cvt.rn.f16.f32
 
-  %val32 = load float, float addrspace(1)* %in
+  %val32 = load float, ptr addrspace(1) %in
   %val16 = fptrunc float %val32 to half
-  store half %val16, half addrspace(1)* %out
+  store half %val16, ptr addrspace(1) %out
   ret void
 }
 
-define void @test_trunc64(double addrspace(1)* %in, half addrspace(1)* %out) {
+define void @test_trunc64(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_trunc64
 ; CHECK: cvt.rn.f16.f64
 
-  %val32 = load double, double addrspace(1)* %in
+  %val32 = load double, ptr addrspace(1) %in
   %val16 = fptrunc double %val32 to half
-  store half %val16, half addrspace(1)* %out
+  store half %val16, ptr addrspace(1) %out
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/i1-global.ll b/llvm/test/CodeGen/NVPTX/i1-global.ll
index 2f7ec7675b2b8..3a4d87891328a 100644
--- a/llvm/test/CodeGen/NVPTX/i1-global.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-global.ll
@@ -8,13 +8,13 @@ target triple = "nvptx-nvidia-cuda"
 @mypred = addrspace(1) global i1 true, align 1
 
 
-define void @foo(i1 %p, i32* %out) {
-  %ld = load i1, i1 addrspace(1)* @mypred
+define void @foo(i1 %p, ptr %out) {
+  %ld = load i1, ptr addrspace(1) @mypred
   %val = zext i1 %ld to i32
-  store i32 %val, i32* %out
+  store i32 %val, ptr %out
   ret void
 }
 
 
 !nvvm.annotations = !{!0}
-!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll
index a759d27f1b1f2..c64c4d35779d7 100644
--- a/llvm/test/CodeGen/NVPTX/i1-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-param.ll
@@ -9,12 +9,12 @@ target triple = "nvptx-nvidia-cuda"
 ; CHECK: .entry foo
 ; CHECK:   .param .u8 foo_param_0
 ; CHECK:   .param .u32 foo_param_1
-define void @foo(i1 %p, i32* %out) {
+define void @foo(i1 %p, ptr %out) {
   %val = zext i1 %p to i32
-  store i32 %val, i32* %out
+  store i32 %val, ptr %out
   ret void
 }
 
 
 !nvvm.annotations = !{!0}
-!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll
index 6a514e74bd2fd..52178e65c5ee5 100644
--- a/llvm/test/CodeGen/NVPTX/i128-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i128-param.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: .visible .func callee(
 ; CHECK-NEXT: .param .align 16 .b8 callee_param_0[16],
 ; CHECK-NEXT: .param .align 16 .b8 callee_param_1[16],
-define void @callee(i128, i128, i128*) {
+define void @callee(i128, i128, ptr) {
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [callee_param_0];
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [callee_param_1];
 
@@ -16,14 +16,14 @@ define void @callee(i128, i128, i128*) {
 	; CHECK-NEXT: mul.lo.s64 %[[REG9:rd[0-9]+]], %[[REG0]], %[[REG2]];
   %a = mul i128 %0, %1
 
-  store i128 %a, i128* %2
+  store i128 %a, ptr %2
   ret void
 }
 
 ; CHECK-LABEL: .visible .entry caller_kernel(
 ; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_0[16],
 ; CHECK-NEXT: .param .align 16 .b8 caller_kernel_param_1[16],
-define ptx_kernel void @caller_kernel(i128, i128, i128*) {
+define ptx_kernel void @caller_kernel(i128, i128, ptr) {
 start:
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_kernel_param_0];
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
@@ -34,7 +34,7 @@ start:
 	; CHECK:      .param .align 16 .b8 param1[16];
 	; CHECK-NEXT: st.param.v2.b64 	[param1+0], {%[[REG2]], %[[REG3]]}
 	; CHECK:      } // callseq [[CALLSEQ_ID]]
-  call void @callee(i128 %0, i128 %1, i128* %2)
+  call void @callee(i128 %0, i128 %1, ptr %2)
 
   ret void
 }
@@ -42,7 +42,7 @@ start:
 ; CHECK-LABEL: .visible .func caller_func(
 ; CHECK-NEXT: .param .align 16 .b8 caller_func_param_0[16],
 ; CHECK-NEXT: .param .align 16 .b8 caller_func_param_1[16],
-define void @caller_func(i128, i128, i128*) {
+define void @caller_func(i128, i128, ptr) {
 start:
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_func_param_0]
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
@@ -53,7 +53,7 @@ start:
 	; CHECK: .param .align 16 .b8 param1[16];
   ; CHECK: st.param.v2.b64 	[param1+0], {%[[REG2]], %[[REG3]]}
 	; CHECK: } // callseq [[CALLSEQ_ID]]
-  call void @callee(i128 %0, i128 %1, i128* %2)
+  call void @callee(i128 %0, i128 %1, ptr %2)
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/i128-retval.ll b/llvm/test/CodeGen/NVPTX/i128-retval.ll
index 9a7fd8cc138b4..df173536c297f 100644
--- a/llvm/test/CodeGen/NVPTX/i128-retval.ll
+++ b/llvm/test/CodeGen/NVPTX/i128-retval.ll
@@ -9,7 +9,7 @@ define i128 @callee(i128) {
 }
 
 ; CHECK-LABEL: .visible .func caller(
-define void @caller(i128, i128*) {
+define void @caller(i128, ptr) {
 start:
   ; CHECK-DAG: ld.param.v2.u64 {%[[REG0:rd[0-9]+]], %[[REG1:rd[0-9]+]]}, [caller_param_0];
   ; CHECK-DAG: ld.param.u64 %[[OUT:rd[0-9]+]],  [caller_param_1];
@@ -23,7 +23,7 @@ start:
 
 	; CHECK-DAG: st.u64 [%[[OUT]]], %[[REG2]];
 	; CHECK-DAG: st.u64 [%[[OUT]]+8], %[[REG3]];
-  store i128 %a, i128* %1
+  store i128 %a, ptr %1
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/i8-param.ll b/llvm/test/CodeGen/NVPTX/i8-param.ll
index 18d4a71b16607..d8ca8fe28079d 100644
--- a/llvm/test/CodeGen/NVPTX/i8-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i8-param.ll
@@ -12,12 +12,12 @@ define i8 @callee(i8 %a) {
 }
 
 ; CHECK: .visible .func caller
-define void @caller(i8* %a) {
+define void @caller(ptr %a) {
 ; CHECK: ld.u8
-  %val = load i8, i8* %a
+  %val = load i8, ptr %a
   %ret = tail call i8 @callee(i8 %val)
 ; CHECK: ld.param.b32
-  store i8 %ret, i8* %a
+  store i8 %ret, ptr %a
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll b/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll
index c054fe86cfc58..04c180d4d5c99 100644
--- a/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll
+++ b/llvm/test/CodeGen/NVPTX/inlineasm-output-template.ll
@@ -16,7 +16,7 @@ define dso_local i32 @test_inlineasm_c_output_template0() {
 ; check: //TEST baz
 ;@baz = internal global i32 0, align 4
 ;define dso_local i32 @test_inlineasm_c_output_template1() {
-;  tail call void asm sideeffect "//TEST ${0:c}", "i"(i32* nonnull @baz)
+;  tail call void asm sideeffect "//TEST ${0:c}", "i"(ptr nonnull @baz)
 ;  ret i32 42
 ;}
 

diff  --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
index 2dbd29f616f88..040bbde13800c 100644
--- a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
+++ b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll
@@ -4,18 +4,18 @@
 ; optimizations (such as the store below being eliminated as dead code). This
 ; test makes sure we don't regress.
 
-declare void @foo(i32 addrspace(1)*)
+declare void @foo(ptr addrspace(1))
 
-declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*)
+declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
 
 ; CHECK: @bar
 define void @bar() {
   %t1 = alloca i32
-; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* nonnull %t1)
-; CHECK-NEXT: store i32 10, i32* %t1
-  %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1)
-  store i32 10, i32* %t1
-  call void @foo(i32 addrspace(1)* %t2)
+; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1)
+; CHECK-NEXT: store i32 10, ptr %t1
+  %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1)
+  store i32 10, ptr %t1
+  call void @foo(ptr addrspace(1) %t2)
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 100465d3fdba0..c6061d3502e91 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -75,12 +75,12 @@ define i32 @test_popc64_trunc(i64 %a) {
 ; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
 ; then converting back to i16.
 ; CHECK-LABEL: test_popc16
-define void @test_popc16(i16 %a, i16* %b) {
+define void @test_popc16(i16 %a, ptr %b) {
 ; CHECK: cvt.u32.u16
 ; CHECK: popc.b32
 ; CHECK: cvt.u16.u32
   %val = call i16 @llvm.ctpop.i16(i16 %a)
-  store i16 %val, i16* %b
+  store i16 %val, ptr %b
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/isspacep.ll b/llvm/test/CodeGen/NVPTX/isspacep.ll
index fe3741012ba30..8ac199aa4a0b7 100644
--- a/llvm/test/CodeGen/NVPTX/isspacep.ll
+++ b/llvm/test/CodeGen/NVPTX/isspacep.ll
@@ -1,36 +1,36 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 
-declare i1 @llvm.nvvm.isspacep.const(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.global(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.local(i8*) readnone noinline
-declare i1 @llvm.nvvm.isspacep.shared(i8*) readnone noinline
+declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
+declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
 
 ; CHECK: is_const
-define i1 @is_const(i8* %addr) {
+define i1 @is_const(ptr %addr) {
 ; CHECK: isspacep.const
-  %v = tail call i1 @llvm.nvvm.isspacep.const(i8* %addr)
+  %v = tail call i1 @llvm.nvvm.isspacep.const(ptr %addr)
   ret i1 %v
 }
 
 ; CHECK: is_global
-define i1 @is_global(i8* %addr) {
+define i1 @is_global(ptr %addr) {
 ; CHECK: isspacep.global
-  %v = tail call i1 @llvm.nvvm.isspacep.global(i8* %addr)
+  %v = tail call i1 @llvm.nvvm.isspacep.global(ptr %addr)
   ret i1 %v
 }
 
 ; CHECK: is_local
-define i1 @is_local(i8* %addr) {
+define i1 @is_local(ptr %addr) {
 ; CHECK: isspacep.local
-  %v = tail call i1 @llvm.nvvm.isspacep.local(i8* %addr)
+  %v = tail call i1 @llvm.nvvm.isspacep.local(ptr %addr)
   ret i1 %v
 }
 
 ; CHECK: is_shared
-define i1 @is_shared(i8* %addr) {
+define i1 @is_shared(ptr %addr) {
 ; CHECK: isspacep.shared
-  %v = tail call i1 @llvm.nvvm.isspacep.shared(i8* %addr)
+  %v = tail call i1 @llvm.nvvm.isspacep.shared(ptr %addr)
   ret i1 %v
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/ld-generic.ll b/llvm/test/CodeGen/NVPTX/ld-generic.ll
index 472f3e88efee6..087de8c63709f 100644
--- a/llvm/test/CodeGen/NVPTX/ld-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-generic.ll
@@ -5,61 +5,61 @@
 
 
 ;; i8
-define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
+define i8 @ld_global_i8(ptr addrspace(0) %ptr) {
 ; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i8, i8 addrspace(0)* %ptr
+  %a = load i8, ptr addrspace(0) %ptr
   ret i8 %a
 }
 
 ;; i16
-define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
+define i16 @ld_global_i16(ptr addrspace(0) %ptr) {
 ; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i16, i16 addrspace(0)* %ptr
+  %a = load i16, ptr addrspace(0) %ptr
   ret i16 %a
 }
 
 ;; i32
-define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
+define i32 @ld_global_i32(ptr addrspace(0) %ptr) {
 ; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i32, i32 addrspace(0)* %ptr
+  %a = load i32, ptr addrspace(0) %ptr
   ret i32 %a
 }
 
 ;; i64
-define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
+define i64 @ld_global_i64(ptr addrspace(0) %ptr) {
 ; PTX32: ld.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i64, i64 addrspace(0)* %ptr
+  %a = load i64, ptr addrspace(0) %ptr
   ret i64 %a
 }
 
 ;; f32
-define float @ld_global_f32(float addrspace(0)* %ptr) {
+define float @ld_global_f32(ptr addrspace(0) %ptr) {
 ; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load float, float addrspace(0)* %ptr
+  %a = load float, ptr addrspace(0) %ptr
   ret float %a
 }
 
 ;; f64
-define double @ld_global_f64(double addrspace(0)* %ptr) {
+define double @ld_global_f64(ptr addrspace(0) %ptr) {
 ; PTX32: ld.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: ret
 ; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load double, double addrspace(0)* %ptr
+  %a = load double, ptr addrspace(0) %ptr
   ret double %a
 }

diff  --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
index 0ef9317f4d0cb..b46232880fc52 100644
--- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
+++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll
@@ -5,18 +5,18 @@
 ; ld.global.nc.
 
 ; CHECK-LABEL: @ld_global
-define i32 @ld_global(i32 addrspace(1)* %ptr) {
+define i32 @ld_global(ptr addrspace(1) %ptr) {
 ; CHECK: ld.global.nc.{{[a-z]}}32
-  %a = load i32, i32 addrspace(1)* %ptr, !invariant.load !0
+  %a = load i32, ptr addrspace(1) %ptr, !invariant.load !0
   ret i32 %a
 }
 
 ; CHECK-LABEL: @ld_global_v2f16
-define half @ld_global_v2f16(<2 x half> addrspace(1)* %ptr) {
+define half @ld_global_v2f16(ptr addrspace(1) %ptr) {
 ; Load of v2f16 is weird. We consider it to be a legal type, which happens to be
 ; loaded/stored as a 32-bit scalar.
 ; CHECK: ld.global.nc.b32
-  %a = load <2 x half>, <2 x half> addrspace(1)* %ptr, !invariant.load !0
+  %a = load <2 x half>, ptr addrspace(1) %ptr, !invariant.load !0
   %v1 = extractelement <2 x half> %a, i32 0
   %v2 = extractelement <2 x half> %a, i32 1
   %sum = fadd half %v1, %v2
@@ -24,12 +24,12 @@ define half @ld_global_v2f16(<2 x half> addrspace(1)* %ptr) {
 }
 
 ; CHECK-LABEL: @ld_global_v4f16
-define half @ld_global_v4f16(<4 x half> addrspace(1)* %ptr) {
+define half @ld_global_v4f16(ptr addrspace(1) %ptr) {
 ; Larger f16 vectors may be split into individual f16 elements and multiple
 ; loads/stores may be vectorized using f16 element type. Practically it's
 ; limited to v4 variant only.
 ; CHECK: ld.global.nc.v4.b16
-  %a = load <4 x half>, <4 x half> addrspace(1)* %ptr, !invariant.load !0
+  %a = load <4 x half>, ptr addrspace(1) %ptr, !invariant.load !0
   %v1 = extractelement <4 x half> %a, i32 0
   %v2 = extractelement <4 x half> %a, i32 1
   %v3 = extractelement <4 x half> %a, i32 2
@@ -41,11 +41,11 @@ define half @ld_global_v4f16(<4 x half> addrspace(1)* %ptr) {
 }
 
 ; CHECK-LABEL: @ld_global_v8f16
-define half @ld_global_v8f16(<8 x half> addrspace(1)* %ptr) {
+define half @ld_global_v8f16(ptr addrspace(1) %ptr) {
 ; Larger vectors are, again, loaded as v4i32. PTX has no v8 variants of loads/stores,
 ; so load/store vectorizer has to convert v8f16 -> v4 x v2f16.
 ; CHECK: ld.global.nc.v4.b32
-  %a = load <8 x half>, <8 x half> addrspace(1)* %ptr, !invariant.load !0
+  %a = load <8 x half>, ptr addrspace(1) %ptr, !invariant.load !0
   %v1 = extractelement <8 x half> %a, i32 0
   %v2 = extractelement <8 x half> %a, i32 2
   %v3 = extractelement <8 x half> %a, i32 4
@@ -57,9 +57,9 @@ define half @ld_global_v8f16(<8 x half> addrspace(1)* %ptr) {
 }
 
 ; CHECK-LABEL: @ld_global_v2i32
-define i32 @ld_global_v2i32(<2 x i32> addrspace(1)* %ptr) {
+define i32 @ld_global_v2i32(ptr addrspace(1) %ptr) {
 ; CHECK: ld.global.nc.v2.{{[a-z]}}32
-  %a = load <2 x i32>, <2 x i32> addrspace(1)* %ptr, !invariant.load !0
+  %a = load <2 x i32>, ptr addrspace(1) %ptr, !invariant.load !0
   %v1 = extractelement <2 x i32> %a, i32 0
   %v2 = extractelement <2 x i32> %a, i32 1
   %sum = add i32 %v1, %v2
@@ -67,9 +67,9 @@ define i32 @ld_global_v2i32(<2 x i32> addrspace(1)* %ptr) {
 }
 
 ; CHECK-LABEL: @ld_global_v4i32
-define i32 @ld_global_v4i32(<4 x i32> addrspace(1)* %ptr) {
+define i32 @ld_global_v4i32(ptr addrspace(1) %ptr) {
 ; CHECK: ld.global.nc.v4.{{[a-z]}}32
-  %a = load <4 x i32>, <4 x i32> addrspace(1)* %ptr, !invariant.load !0
+  %a = load <4 x i32>, ptr addrspace(1) %ptr, !invariant.load !0
   %v1 = extractelement <4 x i32> %a, i32 0
   %v2 = extractelement <4 x i32> %a, i32 1
   %v3 = extractelement <4 x i32> %a, i32 2
@@ -81,16 +81,16 @@ define i32 @ld_global_v4i32(<4 x i32> addrspace(1)* %ptr) {
 }
 
 ; CHECK-LABEL: @ld_not_invariant
-define i32 @ld_not_invariant(i32 addrspace(1)* %ptr) {
+define i32 @ld_not_invariant(ptr addrspace(1) %ptr) {
 ; CHECK: ld.global.{{[a-z]}}32
-  %a = load i32, i32 addrspace(1)* %ptr
+  %a = load i32, ptr addrspace(1) %ptr
   ret i32 %a
 }
 
 ; CHECK-LABEL: @ld_not_global_addrspace
-define i32 @ld_not_global_addrspace(i32 addrspace(0)* %ptr) {
+define i32 @ld_not_global_addrspace(ptr addrspace(0) %ptr) {
 ; CHECK: ld.{{[a-z]}}32
-  %a = load i32, i32 addrspace(0)* %ptr
+  %a = load i32, ptr addrspace(0) %ptr
   ret i32 %a
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
index e9038412de380..55031e3cf96eb 100644
--- a/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
+++ b/llvm/test/CodeGen/NVPTX/ldparam-v4.ll
@@ -4,11 +4,11 @@
 declare <4 x float> @bar()
 
 ; CHECK-LABEL: .func foo(
-define void @foo(<4 x float>* %ptr) {
+define void @foo(ptr %ptr) {
 ; CHECK:     ld.param.u32 %[[PTR:r[0-9]+]], [foo_param_0];
 ; CHECK:     ld.param.v4.f32 {[[E0:%f[0-9]+]], [[E1:%f[0-9]+]], [[E2:%f[0-9]+]], [[E3:%f[0-9]+]]}, [retval0+0];
 ; CHECK:     st.v4.f32    [%[[PTR]]], {[[E0]], [[E1]], [[E2]], [[E3]]}
   %val = tail call <4 x float> @bar()
-  store <4 x float> %val, <4 x float>* %ptr
+  store <4 x float> %val, ptr %ptr
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/ldu-i8.ll b/llvm/test/CodeGen/NVPTX/ldu-i8.ll
index 88c10504036ba..23299e871ccc4 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-i8.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-i8.ll
@@ -3,13 +3,13 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
-declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr, i32)
 
-define i8 @foo(i8* %a) {
+define i8 @foo(ptr %a) {
 ; Ensure we properly truncate off the high-order 24 bits
 ; CHECK:        ldu.global.u8
 ; CHECK:        cvt.u32.u16
 ; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4)
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr %a, i32 4)
   ret i8 %val
 }

diff  --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
index 2cdbd3dd32a4a..e6c5372755f57 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -2,36 +2,36 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_32 | %ptxas-verify %if !ptxas-11.0 %{-arch=sm_32%} %}
 
 
-declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
-declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
-declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
 
 
 ; CHECK: func0
-define i8 @func0(i8 addrspace(1)* %ptr) {
+define i8 @func0(ptr addrspace(1) %ptr) {
 ; ldu.global.u8
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: func1
-define i32 @func1(i32 addrspace(1)* %ptr) {
+define i32 @func1(ptr addrspace(1) %ptr) {
 ; ldu.global.u32
-  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }
 
 ; CHECK: func2
-define i8 @func2(i8 addrspace(1)* %ptr) {
+define i8 @func2(ptr addrspace(1) %ptr) {
 ; ld.global.nc.u8
-  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: func3
-define i32 @func3(i32 addrspace(1)* %ptr) {
+define i32 @func3(ptr addrspace(1) %ptr) {
 ; ld.global.nc.u32
-  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
   ret i32 %val
 }

diff  --git a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index dd5d4d5702d6e..44c82fcb1a14c 100644
--- a/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/llvm/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -4,17 +4,17 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
 
-define void @reg_plus_offset(i32* %a) {
+define void @reg_plus_offset(ptr %a) {
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
-  %p2 = getelementptr i32, i32* %a, i32 8
-  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
-  %p3 = getelementptr i32, i32* %a, i32 9
-  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
+  %p2 = getelementptr i32, ptr %a, i32 8
+  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr %p2, i32 4)
+  %p3 = getelementptr i32, ptr %a, i32 9
+  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr %p3, i32 4)
   %t3 = mul i32 %t1, %t2
-  store i32 %t3, i32* %a
+  store i32 %t3, ptr %a
   ret void
 }
 
-declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0(ptr, i32)
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()

diff  --git a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
index 1847b5d774fa6..d7a9765c8e83d 100644
--- a/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
+++ b/llvm/test/CodeGen/NVPTX/libcall-fulfilled.ll
@@ -3,8 +3,8 @@
 
 ; Allow to make libcalls that are defined in the current module
 
-declare i8* @malloc(i64)
-declare void @free(i8*)
+declare ptr @malloc(i64)
+declare void @free(ptr)
 
 ; Underlying libcall declaration
 ; CHECK: .visible .func  (.param .align 16 .b8 func_retval0[16]) __umodti3
@@ -40,8 +40,8 @@ define void @malloc_then_free() {
 ; CHECK:  malloc,
 ; CHECK:  call.uni
 ; CHECK:  free,
-  %a = call i8* @malloc(i64 4)
-  store i8 0, i8* %a
-  call void @free(i8* %a)
+  %a = call ptr @malloc(i64 4)
+  store i8 0, ptr %a
+  call void @free(ptr %a)
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
index e0044404c6626..eb2596bba1b65 100644
--- a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
+++ b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
@@ -4,12 +4,12 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 target triple = "nvptx-nvidia-cuda"
 
-define void @main(i1* %a1, i32 %a2, i32* %arg3) {
+define void @main(ptr %a1, i32 %a2, ptr %arg3) {
 ; CHECK: ld.u8
 ; CHECK-NOT: ld.u1
-  %t1 = getelementptr i1, i1* %a1, i32 %a2
-  %t2 = load i1, i1* %t1
+  %t1 = getelementptr i1, ptr %a1, i32 %a2
+  %t2 = load i1, ptr %t1
   %t3 = sext i1 %t2 to i32
-  store i32 %t3, i32* %arg3
+  store i32 %t3, ptr %arg3
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index de61b9ef9deb0..0955b433e0f76 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -2,94 +2,94 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
 ; CHECK-LABEL: plain
-define void @plain(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+define void @plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
   ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %a.load = load i8, i8* %a
+  %a.load = load i8, ptr %a
   %a.add = add i8 %a.load, 1
   ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store i8 %a.add, i8* %a
+  store i8 %a.add, ptr %a
 
   ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %b.load = load i16, i16* %b
+  %b.load = load i16, ptr %b
   %b.add = add i16 %b.load, 1
   ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store i16 %b.add, i16* %b
+  store i16 %b.add, ptr %b
 
   ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %c.load = load i32, i32* %c
+  %c.load = load i32, ptr %c
   %c.add = add i32 %c.load, 1
   ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
-  store i32 %c.add, i32* %c
+  store i32 %c.add, ptr %c
 
   ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %d.load = load i64, i64* %d
+  %d.load = load i64, ptr %d
   %d.add = add i64 %d.load, 1
   ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
-  store i64 %d.add, i64* %d
+  store i64 %d.add, ptr %d
 
   ret void
 }
 
 ; CHECK-LABEL: volatile
-define void @volatile(i8* %a, i16* %b, i32* %c, i64* %d) local_unnamed_addr {
+define void @volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
   ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %a.load = load volatile i8, i8* %a
+  %a.load = load volatile i8, ptr %a
   %a.add = add i8 %a.load, 1
   ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store volatile i8 %a.add, i8* %a
+  store volatile i8 %a.add, ptr %a
 
   ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %b.load = load volatile i16, i16* %b
+  %b.load = load volatile i16, ptr %b
   %b.add = add i16 %b.load, 1
   ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store volatile i16 %b.add, i16* %b
+  store volatile i16 %b.add, ptr %b
 
   ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %c.load = load volatile i32, i32* %c
+  %c.load = load volatile i32, ptr %c
   %c.add = add i32 %c.load, 1
   ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
-  store volatile i32 %c.add, i32* %c
+  store volatile i32 %c.add, ptr %c
 
   ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %d.load = load volatile i64, i64* %d
+  %d.load = load volatile i64, ptr %d
   %d.add = add i64 %d.load, 1
   ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
-  store volatile i64 %d.add, i64* %d
+  store volatile i64 %d.add, ptr %d
 
   ret void
 }
 
 ; CHECK-LABEL: monotonic
-define void @monotonic(i8* %a, i16* %b, i32* %c, i64* %d, float* %e) local_unnamed_addr {
+define void @monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
   ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %a.load = load atomic i8, i8* %a monotonic, align 1
+  %a.load = load atomic i8, ptr %a monotonic, align 1
   %a.add = add i8 %a.load, 1
   ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store atomic i8 %a.add, i8* %a monotonic, align 1
+  store atomic i8 %a.add, ptr %a monotonic, align 1
 
   ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %b.load = load atomic i16, i16* %b monotonic, align 2
+  %b.load = load atomic i16, ptr %b monotonic, align 2
   %b.add = add i16 %b.load, 1
   ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
-  store atomic i16 %b.add, i16* %b monotonic, align 2
+  store atomic i16 %b.add, ptr %b monotonic, align 2
 
   ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %c.load = load atomic i32, i32* %c monotonic, align 4
+  %c.load = load atomic i32, ptr %c monotonic, align 4
   %c.add = add i32 %c.load, 1
   ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
-  store atomic i32 %c.add, i32* %c monotonic, align 4
+  store atomic i32 %c.add, ptr %c monotonic, align 4
 
   ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %d.load = load atomic i64, i64* %d monotonic, align 8
+  %d.load = load atomic i64, ptr %d monotonic, align 8
   %d.add = add i64 %d.load, 1
   ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
-  store atomic i64 %d.add, i64* %d monotonic, align 8
+  store atomic i64 %d.add, ptr %d monotonic, align 8
 
   ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
-  %e.load = load atomic float, float* %e monotonic, align 4
+  %e.load = load atomic float, ptr %e monotonic, align 4
   %e.add = fadd float %e.load, 1.0
   ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
-  store atomic float %e.add, float* %e monotonic, align 4
+  store atomic float %e.add, ptr %e monotonic, align 4
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
index 6783d7d3467cf..17ea8bf9ca4a3 100644
--- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
+++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -10,9 +10,9 @@ target triple = "nvptx64-unknown-unknown"
 ; SM20: ld.global.f32
 ; SM35-LABEL: .visible .entry foo1(
 ; SM35: ld.global.nc.f32
-define void @foo1(float * noalias readonly %from, float * %to) {
-  %1 = load float, float * %from
-  store float %1, float * %to
+define void @foo1(ptr noalias readonly %from, ptr %to) {
+  %1 = load float, ptr %from
+  store float %1, ptr %to
   ret void
 }
 
@@ -20,9 +20,9 @@ define void @foo1(float * noalias readonly %from, float * %to) {
 ; SM20: ld.global.f64
 ; SM35-LABEL: .visible .entry foo2(
 ; SM35: ld.global.nc.f64
-define void @foo2(double * noalias readonly %from, double * %to) {
-  %1 = load double, double * %from
-  store double %1, double * %to
+define void @foo2(ptr noalias readonly %from, ptr %to) {
+  %1 = load double, ptr %from
+  store double %1, ptr %to
   ret void
 }
 
@@ -30,9 +30,9 @@ define void @foo2(double * noalias readonly %from, double * %to) {
 ; SM20: ld.global.u16
 ; SM35-LABEL: .visible .entry foo3(
 ; SM35: ld.global.nc.u16
-define void @foo3(i16 * noalias readonly %from, i16 * %to) {
-  %1 = load i16, i16 * %from
-  store i16 %1, i16 * %to
+define void @foo3(ptr noalias readonly %from, ptr %to) {
+  %1 = load i16, ptr %from
+  store i16 %1, ptr %to
   ret void
 }
 
@@ -40,9 +40,9 @@ define void @foo3(i16 * noalias readonly %from, i16 * %to) {
 ; SM20: ld.global.u32
 ; SM35-LABEL: .visible .entry foo4(
 ; SM35: ld.global.nc.u32
-define void @foo4(i32 * noalias readonly %from, i32 * %to) {
-  %1 = load i32, i32 * %from
-  store i32 %1, i32 * %to
+define void @foo4(ptr noalias readonly %from, ptr %to) {
+  %1 = load i32, ptr %from
+  store i32 %1, ptr %to
   ret void
 }
 
@@ -50,9 +50,9 @@ define void @foo4(i32 * noalias readonly %from, i32 * %to) {
 ; SM20: ld.global.u64
 ; SM35-LABEL: .visible .entry foo5(
 ; SM35: ld.global.nc.u64
-define void @foo5(i64 * noalias readonly %from, i64 * %to) {
-  %1 = load i64, i64 * %from
-  store i64 %1, i64 * %to
+define void @foo5(ptr noalias readonly %from, ptr %to) {
+  %1 = load i64, ptr %from
+  store i64 %1, ptr %to
   ret void
 }
 
@@ -63,9 +63,9 @@ define void @foo5(i64 * noalias readonly %from, i64 * %to) {
 ; SM35-LABEL: .visible .entry foo6(
 ; SM35: ld.global.nc.u64
 ; SM35: ld.global.nc.u64
-define void @foo6(i128 * noalias readonly %from, i128 * %to) {
-  %1 = load i128, i128 * %from
-  store i128 %1, i128 * %to
+define void @foo6(ptr noalias readonly %from, ptr %to) {
+  %1 = load i128, ptr %from
+  store i128 %1, ptr %to
   ret void
 }
 
@@ -73,9 +73,9 @@ define void @foo6(i128 * noalias readonly %from, i128 * %to) {
 ; SM20: ld.global.v2.u8
 ; SM35-LABEL: .visible .entry foo7(
 ; SM35: ld.global.nc.v2.u8
-define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
-  %1 = load <2 x i8>, <2 x i8> * %from
-  store <2 x i8> %1, <2 x i8> * %to
+define void @foo7(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x i8>, ptr %from
+  store <2 x i8> %1, ptr %to
   ret void
 }
 
@@ -83,9 +83,9 @@ define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
 ; SM20: ld.global.v2.u16
 ; SM35-LABEL: .visible .entry foo8(
 ; SM35: ld.global.nc.v2.u16
-define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
-  %1 = load <2 x i16>, <2 x i16> * %from
-  store <2 x i16> %1, <2 x i16> * %to
+define void @foo8(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x i16>, ptr %from
+  store <2 x i16> %1, ptr %to
   ret void
 }
 
@@ -93,9 +93,9 @@ define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
 ; SM20: ld.global.v2.u32
 ; SM35-LABEL: .visible .entry foo9(
 ; SM35: ld.global.nc.v2.u32
-define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
-  %1 = load <2 x i32>, <2 x i32> * %from
-  store <2 x i32> %1, <2 x i32> * %to
+define void @foo9(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x i32>, ptr %from
+  store <2 x i32> %1, ptr %to
   ret void
 }
 
@@ -103,9 +103,9 @@ define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
 ; SM20: ld.global.v2.u64
 ; SM35-LABEL: .visible .entry foo10(
 ; SM35: ld.global.nc.v2.u64
-define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
-  %1 = load <2 x i64>, <2 x i64> * %from
-  store <2 x i64> %1, <2 x i64> * %to
+define void @foo10(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x i64>, ptr %from
+  store <2 x i64> %1, ptr %to
   ret void
 }
 
@@ -113,9 +113,9 @@ define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
 ; SM20: ld.global.v2.f32
 ; SM35-LABEL: .visible .entry foo11(
 ; SM35: ld.global.nc.v2.f32
-define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
-  %1 = load <2 x float>, <2 x float> * %from
-  store <2 x float> %1, <2 x float> * %to
+define void @foo11(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x float>, ptr %from
+  store <2 x float> %1, ptr %to
   ret void
 }
 
@@ -123,9 +123,9 @@ define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
 ; SM20: ld.global.v2.f64
 ; SM35-LABEL: .visible .entry foo12(
 ; SM35: ld.global.nc.v2.f64
-define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
-  %1 = load <2 x double>, <2 x double> * %from
-  store <2 x double> %1, <2 x double> * %to
+define void @foo12(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x double>, ptr %from
+  store <2 x double> %1, ptr %to
   ret void
 }
 
@@ -133,9 +133,9 @@ define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
 ; SM20: ld.global.v4.u8
 ; SM35-LABEL: .visible .entry foo13(
 ; SM35: ld.global.nc.v4.u8
-define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
-  %1 = load <4 x i8>, <4 x i8> * %from
-  store <4 x i8> %1, <4 x i8> * %to
+define void @foo13(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x i8>, ptr %from
+  store <4 x i8> %1, ptr %to
   ret void
 }
 
@@ -143,9 +143,9 @@ define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
 ; SM20: ld.global.v4.u16
 ; SM35-LABEL: .visible .entry foo14(
 ; SM35: ld.global.nc.v4.u16
-define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
-  %1 = load <4 x i16>, <4 x i16> * %from
-  store <4 x i16> %1, <4 x i16> * %to
+define void @foo14(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x i16>, ptr %from
+  store <4 x i16> %1, ptr %to
   ret void
 }
 
@@ -153,9 +153,9 @@ define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
 ; SM20: ld.global.v4.u32
 ; SM35-LABEL: .visible .entry foo15(
 ; SM35: ld.global.nc.v4.u32
-define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
-  %1 = load <4 x i32>, <4 x i32> * %from
-  store <4 x i32> %1, <4 x i32> * %to
+define void @foo15(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x i32>, ptr %from
+  store <4 x i32> %1, ptr %to
   ret void
 }
 
@@ -163,9 +163,9 @@ define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
 ; SM20: ld.global.v4.f32
 ; SM35-LABEL: .visible .entry foo16(
 ; SM35: ld.global.nc.v4.f32
-define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
-  %1 = load <4 x float>, <4 x float> * %from
-  store <4 x float> %1, <4 x float> * %to
+define void @foo16(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x float>, ptr %from
+  store <4 x float> %1, ptr %to
   ret void
 }
 
@@ -175,9 +175,9 @@ define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
 ; SM35-LABEL: .visible .entry foo17(
 ; SM35: ld.global.nc.v2.f64
 ; SM35: ld.global.nc.v2.f64
-define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
-  %1 = load <4 x double>, <4 x double> * %from
-  store <4 x double> %1, <4 x double> * %to
+define void @foo17(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x double>, ptr %from
+  store <4 x double> %1, ptr %to
   ret void
 }
 
@@ -185,9 +185,9 @@ define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
 ; SM20: ld.global.u64
 ; SM35-LABEL: .visible .entry foo18(
 ; SM35: ld.global.nc.u64
-define void @foo18(float ** noalias readonly %from, float ** %to) {
-  %1 = load float *, float ** %from
-  store float * %1, float ** %to
+define void @foo18(ptr noalias readonly %from, ptr %to) {
+  %1 = load ptr, ptr %from
+  store ptr %1, ptr %to
   ret void
 }
 
@@ -196,22 +196,22 @@ define void @foo18(float ** noalias readonly %from, float ** %to) {
 ; SM20: ld.global.f32
 ; SM35-LABEL: .visible .entry foo19(
 ; SM35: ld.global.nc.f32
-define void @foo19(float * noalias readonly %from, float * %to, i32 %n) {
+define void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
 entry:
   br label %loop
 
 loop:
   %i = phi i32 [ 0, %entry ], [ %nexti, %loop ]
   %sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ]
-  %ptr = getelementptr inbounds float, float * %from, i32 %i
-  %value = load float, float * %ptr, align 4
+  %ptr = getelementptr inbounds float, ptr %from, i32 %i
+  %value = load float, ptr %ptr, align 4
   %nextsum = fadd float %value, %sum
   %nexti = add nsw i32 %i, 1
   %exitcond = icmp eq i32 %nexti, %n
   br i1 %exitcond, label %exit, label %loop
 
 exit:
-  store float %nextsum, float * %to
+  store float %nextsum, ptr %to
   ret void
 }
 
@@ -224,9 +224,9 @@ exit:
 ; SM20: ld.f32
 ; SM35-LABEL: notkernel(
 ; SM35: ld.f32
-define void @notkernel(float * noalias readonly %from, float * %to) {
-  %1 = load float, float * %from
-  store float %1, float * %to
+define void @notkernel(ptr noalias readonly %from, ptr %to) {
+  %1 = load float, ptr %from
+  store float %1, ptr %to
   ret void
 }
 
@@ -238,29 +238,29 @@ define void @notkernel(float * noalias readonly %from, float * %to) {
 ; SM20: ld.global.f32
 ; SM35-LABEL: notkernel2(
 ; SM35: ld.global.f32
-define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) {
-  %1 = load float, float addrspace(1) * %from
-  store float %1, float * %to
+define void @notkernel2(ptr addrspace(1) noalias readonly %from, ptr %to) {
+  %1 = load float, ptr addrspace(1) %from
+  store float %1, ptr %to
   ret void
 }
 
 !nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
-!1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
-!2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
-!3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
-!4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
-!5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
-!6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
-!7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
-!8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
-!9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
-!10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
-!11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
-!12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
-!13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
-!14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
-!15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
-!16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
-!17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
-!18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}
-!19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1}
+!1 = !{ptr @foo1, !"kernel", i32 1}
+!2 = !{ptr @foo2, !"kernel", i32 1}
+!3 = !{ptr @foo3, !"kernel", i32 1}
+!4 = !{ptr @foo4, !"kernel", i32 1}
+!5 = !{ptr @foo5, !"kernel", i32 1}
+!6 = !{ptr @foo6, !"kernel", i32 1}
+!7 = !{ptr @foo7, !"kernel", i32 1}
+!8 = !{ptr @foo8, !"kernel", i32 1}
+!9 = !{ptr @foo9, !"kernel", i32 1}
+!10 = !{ptr @foo10, !"kernel", i32 1}
+!11 = !{ptr @foo11, !"kernel", i32 1}
+!12 = !{ptr @foo12, !"kernel", i32 1}
+!13 = !{ptr @foo13, !"kernel", i32 1}
+!14 = !{ptr @foo14, !"kernel", i32 1}
+!15 = !{ptr @foo15, !"kernel", i32 1}
+!16 = !{ptr @foo16, !"kernel", i32 1}
+!17 = !{ptr @foo17, !"kernel", i32 1}
+!18 = !{ptr @foo18, !"kernel", i32 1}
+!19 = !{ptr @foo19, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 2150a9ed340db..14e6d1c4bb76e 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -15,7 +15,7 @@
 ; PTX64:        st.volatile.u32  [%SP+0], %r{{[0-9]+}};
 define void @foo(i32 %a) {
   %local = alloca i32, align 4
-  store volatile i32 %a, i32* %local
+  store volatile i32 %a, ptr %local
   ret void
 }
 
@@ -31,15 +31,15 @@ define void @foo(i32 %a) {
 ; PTX64:        st.local.u32  [%rd[[SP_REG]]], %r{{[0-9]+}};
 define void @foo2(i32 %a) {
   %local = alloca i32, align 4
-  store i32 %a, i32* %local
-  call void @bar(i32* %local)
+  store i32 %a, ptr %local
+  call void @bar(ptr %local)
   ret void
 }
 
-declare void @bar(i32* %a)
+declare void @bar(ptr %a)
 
 !nvvm.annotations = !{!0}
-!0 = !{void (i32)* @foo2, !"kernel", i32 1}
+!0 = !{ptr @foo2, !"kernel", i32 1}
 
 ; PTX32:        mov.u32          %SPL, __local_depot{{[0-9]+}};
 ; PTX32-NOT:    cvta.local.u32   %SP, %SPL;
@@ -53,9 +53,8 @@ declare void @bar(i32* %a)
 ; PTX64:        st.local.u32  [%rd{{[0-9]+}}], %r{{[0-9]+}};
 define void @foo3(i32 %a) {
   %local = alloca [3 x i32], align 4
-  %1 = bitcast [3 x i32]* %local to i32*
-  %2 = getelementptr inbounds i32, i32* %1, i32 %a
-  store i32 %a, i32* %2
+  %1 = getelementptr inbounds i32, ptr %local, i32 %a
+  store i32 %a, ptr %1
   ret void
 }
 
@@ -76,9 +75,9 @@ define void @foo3(i32 %a) {
 define void @foo4() {
   %A = alloca i32
   %B = alloca i32
-  store i32 0, i32* %A
-  store i32 0, i32* %B
-  call void @bar(i32* %A)
-  call void @bar(i32* %B)
+  store i32 0, ptr %A
+  store i32 0, ptr %B
+  call void @bar(ptr %A)
+  call void @bar(ptr %B)
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/loop-vectorize.ll b/llvm/test/CodeGen/NVPTX/loop-vectorize.ll
index 1b337441ac967..acfcf514760a9 100644
--- a/llvm/test/CodeGen/NVPTX/loop-vectorize.ll
+++ b/llvm/test/CodeGen/NVPTX/loop-vectorize.ll
@@ -36,4 +36,4 @@ declare void @llvm.assume(i1) #0
 attributes #0 = { nounwind }
 
 !nvvm.annotations = !{!0}
-!0 = !{void (i32, i32, i32)* @no_vectorization, !"kernel", i32 1}
+!0 = !{ptr @no_vectorization, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
index 7251d9913cd2b..1818c268f61f3 100644
--- a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -8,14 +8,14 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "nvptx64-unknown-unknown"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #1
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
+declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #1
 
-define i8* @memcpy_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memcpy_caller(ptr %dst, ptr %src, i64 %n) #0 {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 false)
-  ret i8* %dst
+  tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+  ret ptr %dst
 
 ; IR-LABEL:   @memcpy_caller
 ; IR:         entry:
@@ -24,16 +24,16 @@ entry:
 
 ; IR:         loop-memcpy-expansion:
 ; IR:         %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; IR:         [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR:         [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; IR:         [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR:         store i8 [[Load]], i8* [[DstGep]]
+; IR:         [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR:         [[Load:%[0-9]+]] = load i8, ptr [[SrcGep]]
+; IR:         [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR:         store i8 [[Load]], ptr [[DstGep]]
 ; IR:         [[IndexInc]] = add i64 %loop-index, 1
 ; IR:         [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
 ; IR:         br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
 
 ; IR-LABEL:   post-loop-memcpy-expansion:
-; IR:         ret i8* %dst
+; IR:         ret ptr %dst
 
 ; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memcpy_caller
 ; PTX:        $L__BB[[LABEL:[_0-9]+]]:
@@ -45,10 +45,10 @@ entry:
 
 }
 
-define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memcpy_volatile_caller(ptr %dst, ptr %src, i64 %n) #0 {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 true)
-  ret i8* %dst
+  tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 true)
+  ret ptr %dst
 
 ; IR-LABEL:   @memcpy_volatile_caller
 ; IR:         entry:
@@ -57,16 +57,16 @@ entry:
 
 ; IR:         loop-memcpy-expansion:
 ; IR:         %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %loop-memcpy-expansion ]
-; IR:         [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR:         [[Load:%[0-9]+]] = load volatile i8, i8* [[SrcGep]]
-; IR:         [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR:         store volatile i8 [[Load]], i8* [[DstGep]]
+; IR:         [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR:         [[Load:%[0-9]+]] = load volatile i8, ptr [[SrcGep]]
+; IR:         [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR:         store volatile i8 [[Load]], ptr [[DstGep]]
 ; IR:         [[IndexInc]] = add i64 %loop-index, 1
 ; IR:         [[Cond2:%[0-9]+]] = icmp ult i64 [[IndexInc]], %n
 ; IR:         br i1 [[Cond2]], label %loop-memcpy-expansion, label %post-loop-memcpy-expansion
 
 ; IR-LABEL:   post-loop-memcpy-expansion:
-; IR:         ret i8* %dst
+; IR:         ret ptr %dst
 
 
 ; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memcpy_volatile_caller
@@ -78,25 +78,21 @@ entry:
 ; PTX:        @%p[[PRED]] bra $L__BB[[LABEL]]
 }
 
-define i8* @memcpy_casting_caller(i32* %dst, i32* %src, i64 %n) #0 {
+define ptr @memcpy_casting_caller(ptr %dst, ptr %src, i64 %n) #0 {
 entry:
-  %0 = bitcast i32* %dst to i8*
-  %1 = bitcast i32* %src to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 %n, i1 false)
-  ret i8* %0
+  tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+  ret ptr %dst
 
 ; Check that casts in calls to memcpy are handled properly
 ; IR-LABEL:   @memcpy_casting_caller
-; IR:         [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8*
-; IR:         [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
-; IR:         getelementptr inbounds i8, i8* [[SRCCAST]]
-; IR:         getelementptr inbounds i8, i8* [[DSTCAST]]
+; IR:         getelementptr inbounds i8, ptr %src
+; IR:         getelementptr inbounds i8, ptr %dst
 }
 
-define i8* @memcpy_known_size(i8* %dst, i8* %src) {
+define ptr @memcpy_known_size(ptr %dst, ptr %src) {
 entry:
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 144, i1 false)
-  ret i8* %dst
+  tail call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 144, i1 false)
+  ret ptr %dst
 
 ; Check that calls with compile-time constant size are handled correctly
 ; IR-LABEL:    @memcpy_known_size
@@ -104,28 +100,28 @@ entry:
 ; IR:          br label %load-store-loop
 ; IR:          load-store-loop:
 ; IR:          %loop-index = phi i64 [ 0, %entry ], [ [[IndexInc:%[0-9]+]], %load-store-loop ]
-; IR:          [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 %loop-index
-; IR:          [[Load:%[0-9]+]] = load i8, i8* [[SrcGep]]
-; IR:          [[DstGep:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 %loop-index
-; IR:          store i8 [[Load]], i8* [[DstGep]]
+; IR:          [[SrcGep:%[0-9]+]] = getelementptr inbounds i8, ptr %src, i64 %loop-index
+; IR:          [[Load:%[0-9]+]] = load i8, ptr [[SrcGep]]
+; IR:          [[DstGep:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64 %loop-index
+; IR:          store i8 [[Load]], ptr [[DstGep]]
 ; IR:          [[IndexInc]] = add i64 %loop-index, 1
 ; IR:          [[Cond:%[0-9]+]] = icmp ult i64 %3, 144
 ; IR:          br i1 [[Cond]], label %load-store-loop, label %memcpy-split
 }
 
-define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
+define ptr @memset_caller(ptr %dst, i32 %c, i64 %n) #0 {
 entry:
   %0 = trunc i32 %c to i8
-  tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i1 false)
-  ret i8* %dst
+  tail call void @llvm.memset.p0.i64(ptr %dst, i8 %0, i64 %n, i1 false)
+  ret ptr %dst
 
 ; IR-LABEL:   @memset_caller
 ; IR:         [[VAL:%[0-9]+]] = trunc i32 %c to i8
 ; IR:         [[CMPREG:%[0-9]+]] = icmp eq i64 0, %n
 ; IR:         br i1 [[CMPREG]], label %split, label %loadstoreloop
 ; IR:         loadstoreloop:
-; IR:         [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
-; IR-NEXT:    store i8 [[VAL]], i8* [[STOREPTR]]
+; IR:         [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64
+; IR-NEXT:    store i8 [[VAL]], ptr [[STOREPTR]]
 
 ; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memset_caller(
 ; PTX:        ld.param.u32 %r[[C:[0-9]+]]
@@ -137,26 +133,26 @@ entry:
 ; PTX:        @%p[[PRED]] bra $L__BB[[LABEL]]
 }
 
-define i8* @volatile_memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
+define ptr @volatile_memset_caller(ptr %dst, i32 %c, i64 %n) #0 {
 entry:
   %0 = trunc i32 %c to i8
-  tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i1 true)
-  ret i8* %dst
+  tail call void @llvm.memset.p0.i64(ptr %dst, i8 %0, i64 %n, i1 true)
+  ret ptr %dst
 
 ; IR-LABEL:   @volatile_memset_caller
 ; IR:         [[VAL:%[0-9]+]] = trunc i32 %c to i8
 ; IR:         loadstoreloop:
-; IR:         [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
-; IR-NEXT:    store volatile i8 [[VAL]], i8* [[STOREPTR]]
+; IR:         [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, ptr %dst, i64
+; IR-NEXT:    store volatile i8 [[VAL]], ptr [[STOREPTR]]
 }
 
-define i8* @memmove_caller(i8* %dst, i8* %src, i64 %n) #0 {
+define ptr @memmove_caller(ptr %dst, ptr %src, i64 %n) #0 {
 entry:
-  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i1 false)
-  ret i8* %dst
+  tail call void @llvm.memmove.p0.p0.i64(ptr %dst, ptr %src, i64 %n, i1 false)
+  ret ptr %dst
 
 ; IR-LABEL:   @memmove_caller
-; IR:         icmp ult i8* %src, %dst
+; IR:         icmp ult ptr %src, %dst
 ; IR:         [[PHIVAL:%[0-9a-zA-Z_]+]] = phi i64
 ; IR-NEXT:    %index_ptr = sub i64 [[PHIVAL]], 1
 ; IR:         [[FWDPHIVAL:%[0-9a-zA-Z_]+]] = phi i64

diff  --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index f32d3bad0e7f8..d52bc74b73ede 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -9,15 +9,15 @@ define void @kernel() {
 ; LABEL: @lower_alloca
 ; PTX-LABEL: .visible .entry kernel(
   %A = alloca i32
-; CHECK: addrspacecast i32* %A to i32 addrspace(5)*
-; CHECK: store i32 0, i32 addrspace(5)* {{%.+}}
+; CHECK: addrspacecast ptr %A to ptr addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
 ; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
-  store i32 0, i32* %A
-  call void @callee(i32* %A)
+  store i32 0, ptr %A
+  call void @callee(ptr %A)
   ret void
 }
 
-declare void @callee(i32*)
+declare void @callee(ptr)
 
 !nvvm.annotations = !{!0}
-!0 = !{void ()* @kernel, !"kernel", i32 1}
+!0 = !{ptr @kernel, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index 75c1cc55b7879..c2d6d3432680b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -6,30 +6,29 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
 %class.outer = type <{ %class.inner, i32, [4 x i8] }>
-%class.inner = type { i32*, i32* }
+%class.inner = type { ptr, ptr }
 
 ; Check that nvptx-lower-args preserves arg alignment
-define void @load_alignment(%class.outer* nocapture readonly byval(%class.outer) align 8 %arg) {
+define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %arg) {
 entry:
-; IR: load %class.outer, %class.outer addrspace(101)*
+; IR: load %class.outer, ptr addrspace(101)
 ; IR-SAME: align 8
 ; PTX: ld.param.u64
 ; PTX-NOT: ld.param.u8
-  %arg.idx = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 0, i32 0
-  %arg.idx.val = load i32*, i32** %arg.idx, align 8
-  %arg.idx1 = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 0, i32 1
-  %arg.idx1.val = load i32*, i32** %arg.idx1, align 8
-  %arg.idx2 = getelementptr %class.outer, %class.outer* %arg, i64 0, i32 1
-  %arg.idx2.val = load i32, i32* %arg.idx2, align 8
-  %arg.idx.val.val = load i32, i32* %arg.idx.val, align 4
+  %arg.idx.val = load ptr, ptr %arg, align 8
+  %arg.idx1 = getelementptr %class.outer, ptr %arg, i64 0, i32 0, i32 1
+  %arg.idx1.val = load ptr, ptr %arg.idx1, align 8
+  %arg.idx2 = getelementptr %class.outer, ptr %arg, i64 0, i32 1
+  %arg.idx2.val = load i32, ptr %arg.idx2, align 8
+  %arg.idx.val.val = load i32, ptr %arg.idx.val, align 4
   %add.i = add nsw i32 %arg.idx.val.val, %arg.idx2.val
-  store i32 %add.i, i32* %arg.idx1.val, align 4
+  store i32 %add.i, ptr %arg.idx1.val, align 4
 
   ; let the pointer escape so we still create a local copy this test uses to
   ; check the load alignment.
-  %tmp = call i32* @escape(i32* nonnull %arg.idx2)
+  %tmp = call ptr @escape(ptr nonnull %arg.idx2)
   ret void
 }
 
 ; Function Attrs: convergent nounwind
-declare dso_local i32* @escape(i32*) local_unnamed_addr
+declare dso_local ptr @escape(ptr) local_unnamed_addr

diff  --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 95dc45a039914..ba24126d1544b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -21,15 +21,15 @@
 ; CHECK: ld.param.u32    [[value:%r[0-9]+]], [%[[param_addr1]]+12];
 ; CHECK: st.global.u32   [[[result_addr_g]]], [[value]];
 ; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @static_offset(i32* nocapture %arg, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
+define dso_local void @static_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
 bb:
   %tmp = icmp eq i32 %arg2, 3
   br i1 %tmp, label %bb3, label %bb6
 
 bb3:                                              ; preds = %bb
-  %tmp4 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 3
-  %tmp5 = load i32, i32* %tmp4, align 4
-  store i32 %tmp5, i32* %arg, align 4
+  %tmp4 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 3
+  %tmp5 = load i32, ptr %tmp4, align 4
+  store i32 %tmp5, ptr %arg, align 4
   br label %bb6
 
 bb6:                                              ; preds = %bb3, %bb
@@ -55,12 +55,12 @@ bb6:                                              ; preds = %bb3, %bb
 ; CHECK: st.global.u32   [[[result_addr_g]]], [[value]];
 
 ; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @dynamic_offset(i32* nocapture %arg, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
+define dso_local void @dynamic_offset(ptr nocapture %arg, ptr nocapture readonly byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #0 {
 bb:
   %tmp = sext i32 %arg2 to i64
-  %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 %tmp
-  %tmp4 = load i32, i32* %tmp3, align 4
-  store i32 %tmp4, i32* %arg, align 4
+  %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp
+  %tmp4 = load i32, ptr %tmp3, align 4
+  store i32 %tmp4, ptr %arg, align 4
   ret void
 }
 
@@ -80,13 +80,12 @@ bb:
 ; CHECK32:     st.global.u8    [{{%r[0-9]+}}], [[value]];
 ;
 ; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @gep_bitcast(i8* nocapture %out,  %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
+define dso_local void @gep_bitcast(ptr nocapture %out,  ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
 bb:
   %n64 = sext i32 %n to i64
-  %gep = getelementptr inbounds %struct.ham, %struct.ham* %in, i64 0, i32 0, i64 %n64
-  %bc = bitcast i32* %gep to i8*
-  %load = load i8, i8* %bc, align 4
-  store i8 %load, i8* %out, align 4
+  %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64
+  %load = load i8, ptr %gep, align 4
+  store i8 %load, ptr %out, align 4
   ret void
 }
 
@@ -106,14 +105,13 @@ bb:
 ; CHECK32:     st.global.u8    [{{%r[0-9]+}}], [[value]];
 ;
 ; Function Attrs: nofree norecurse nounwind willreturn mustprogress
-define dso_local void @gep_bitcast_asc(i8* nocapture %out,  %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
+define dso_local void @gep_bitcast_asc(ptr nocapture %out,  ptr nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
 bb:
   %n64 = sext i32 %n to i64
-  %gep = getelementptr inbounds %struct.ham, %struct.ham* %in, i64 0, i32 0, i64 %n64
-  %bc = bitcast i32* %gep to i8*
-  %asc = addrspacecast i8* %bc to i8 addrspace(101)*
-  %load = load i8, i8 addrspace(101)* %asc, align 4
-  store i8 %load, i8* %out, align 4
+  %gep = getelementptr inbounds %struct.ham, ptr %in, i64 0, i32 0, i64 %n64
+  %asc = addrspacecast ptr %gep to ptr addrspace(101)
+  %load = load i8, ptr addrspace(101) %asc, align 4
+  store i8 %load, ptr %out, align 4
   ret void
 }
 
@@ -141,18 +139,18 @@ bb:
 ; CHECK: st.global.u32   [[[result_addr_g]]], [[value]];
 
 ; Function Attrs: convergent norecurse nounwind mustprogress
-define dso_local void @pointer_escapes(i32* nocapture %arg, %struct.ham* byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #1 {
+define dso_local void @pointer_escapes(ptr nocapture %arg, ptr byval(%struct.ham) align 4 %arg1, i32 %arg2) local_unnamed_addr #1 {
 bb:
   %tmp = sext i32 %arg2 to i64
-  %tmp3 = getelementptr inbounds %struct.ham, %struct.ham* %arg1, i64 0, i32 0, i64 %tmp
-  %tmp4 = load i32, i32* %tmp3, align 4
-  store i32 %tmp4, i32* %arg, align 4
-  %tmp5 = call i32* @escape(i32* nonnull %tmp3) #3
+  %tmp3 = getelementptr inbounds %struct.ham, ptr %arg1, i64 0, i32 0, i64 %tmp
+  %tmp4 = load i32, ptr %tmp3, align 4
+  store i32 %tmp4, ptr %arg, align 4
+  %tmp5 = call ptr @escape(ptr nonnull %tmp3) #3
   ret void
 }
 
 ; Function Attrs: convergent nounwind
-declare dso_local i32* @escape(i32*) local_unnamed_addr
+declare dso_local ptr @escape(ptr) local_unnamed_addr
 
 
 !llvm.module.flags = !{!0, !1, !2}
@@ -161,8 +159,8 @@ declare dso_local i32* @escape(i32*) local_unnamed_addr
 !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 9, i32 1]}
 !1 = !{i32 1, !"wchar_size", i32 4}
 !2 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
-!3 = !{void (i32*, %struct.ham*, i32)* @static_offset, !"kernel", i32 1}
-!4 = !{void (i32*, %struct.ham*, i32)* @dynamic_offset, !"kernel", i32 1}
-!5 = !{void (i32*, %struct.ham*, i32)* @pointer_escapes, !"kernel", i32 1}
-!6 = !{void (i8*, %struct.ham*, i32)* @gep_bitcast, !"kernel", i32 1}
-!7 = !{void (i8*, %struct.ham*, i32)* @gep_bitcast_asc, !"kernel", i32 1}
+!3 = !{ptr @static_offset, !"kernel", i32 1}
+!4 = !{ptr @dynamic_offset, !"kernel", i32 1}
+!5 = !{ptr @pointer_escapes, !"kernel", i32 1}
+!6 = !{ptr @gep_bitcast, !"kernel", i32 1}
+!7 = !{ptr @gep_bitcast_asc, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 6f33d2fad7a6e..f8b91c750d56f 100644
--- a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -6,40 +6,40 @@ target triple = "nvptx64-nvidia-cuda"
 
 ; Verify that both %input and %output are converted to global pointers and then
 ; addrspacecast'ed back to the original type.
-define void @kernel(float* %input, float* %output) {
+define void @kernel(ptr %input, ptr %output) {
 ; CHECK-LABEL: .visible .entry kernel(
 ; CHECK: cvta.to.global.u64
 ; CHECK: cvta.to.global.u64
-  %1 = load float, float* %input, align 4
+  %1 = load float, ptr %input, align 4
 ; CHECK: ld.global.f32
-  store float %1, float* %output, align 4
+  store float %1, ptr %output, align 4
 ; CHECK: st.global.f32
   ret void
 }
 
-define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
+define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) {
 ; CHECK-LABEL: .visible .entry kernel2(
 ; CHECK-NOT: cvta.to.global.u64
-  %1 = load float, float addrspace(1)* %input, align 4
+  %1 = load float, ptr addrspace(1) %input, align 4
 ; CHECK: ld.global.f32
-  store float %1, float addrspace(1)* %output, align 4
+  store float %1, ptr addrspace(1) %output, align 4
 ; CHECK: st.global.f32
   ret void
 }
 
-%struct.S = type { i32*, i32* }
+%struct.S = type { ptr, ptr }
 
-define void @ptr_in_byval_kernel(%struct.S* byval(%struct.S) %input, i32* %output) {
+define void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) {
 ; CHECK-LABEL: .visible .entry ptr_in_byval_kernel(
 ; CHECK: ld.param.u64 	%[[optr:rd.*]], [ptr_in_byval_kernel_param_1]
 ; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]];
 ; CHECK: ld.param.u64 	%[[iptr:rd.*]], [ptr_in_byval_kernel_param_0+8]
 ; CHECK: cvta.to.global.u64 %[[iptr_g:.*]], %[[iptr]];
-  %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
-  %b = load i32*, i32** %b_ptr, align 8
-  %v = load i32, i32* %b, align 4
+  %b_ptr = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+  %b = load ptr, ptr %b_ptr, align 8
+  %v = load i32, ptr %b, align 4
 ; CHECK: ld.global.u32 %[[val:.*]], [%[[iptr_g]]]
-  store i32 %v, i32* %output, align 4
+  store i32 %v, ptr %output, align 4
 ; CHECK: st.global.u32 [%[[optr_g]]], %[[val]]
   ret void
 }
@@ -47,20 +47,20 @@ define void @ptr_in_byval_kernel(%struct.S* byval(%struct.S) %input, i32* %outpu
 ; Regular functions lower byval arguments 
diff erently. We need to make
 ; sure that we're loading byval argument data using [symbol+offset].
 ; There's also no assumption that all pointers within are in global space.
-define void @ptr_in_byval_func(%struct.S* byval(%struct.S) %input, i32* %output) {
+define void @ptr_in_byval_func(ptr byval(%struct.S) %input, ptr %output) {
 ; CHECK-LABEL: .visible .func ptr_in_byval_func(
 ; CHECK: ld.param.u64 	%[[optr:rd.*]], [ptr_in_byval_func_param_1]
 ; CHECK: ld.param.u64 	%[[iptr:rd.*]], [ptr_in_byval_func_param_0+8]
-  %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
-  %b = load i32*, i32** %b_ptr, align 8
-  %v = load i32, i32* %b, align 4
+  %b_ptr = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1
+  %b = load ptr, ptr %b_ptr, align 8
+  %v = load i32, ptr %b, align 4
 ; CHECK: ld.u32 %[[val:.*]], [%[[iptr]]]
-  store i32 %v, i32* %output, align 4
+  store i32 %v, ptr %output, align 4
 ; CHECK: st.u32 [%[[optr]]], %[[val]]
   ret void
 }
 
 !nvvm.annotations = !{!0, !1, !2}
-!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
-!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
-!2 = !{void (%struct.S*, i32*)* @ptr_in_byval_kernel, !"kernel", i32 1}
+!0 = !{ptr @kernel, !"kernel", i32 1}
+!1 = !{ptr @kernel2, !"kernel", i32 1}
+!2 = !{ptr @ptr_in_byval_kernel, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/machine-sink.ll b/llvm/test/CodeGen/NVPTX/machine-sink.ll
index e4b2ea83249a0..9269fec0d150f 100644
--- a/llvm/test/CodeGen/NVPTX/machine-sink.ll
+++ b/llvm/test/CodeGen/NVPTX/machine-sink.ll
@@ -15,8 +15,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define float @post_dominate(float %x, i1 %cond) {
 ; CHECK-LABEL: post_dominate(
 entry:
-  %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
-  %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+  %0 = load float, ptr addrspacecast (ptr addrspace(3) @scalar1 to ptr), align 4
+  %1 = load float, ptr addrspacecast (ptr addrspace(3) @scalar2 to ptr), align 4
 ; CHECK: ld.shared.f32
 ; CHECK: ld.shared.f32
   %2 = fmul float %0, %0

diff  --git a/llvm/test/CodeGen/NVPTX/managed.ll b/llvm/test/CodeGen/NVPTX/managed.ll
index 283b1f908696f..2b762017457c3 100644
--- a/llvm/test/CodeGen/NVPTX/managed.ll
+++ b/llvm/test/CodeGen/NVPTX/managed.ll
@@ -12,8 +12,8 @@
 ; CHECK: .extern .global .align 4 .u32 decl_g;
 @decl_g = external addrspace(1) global i32, align 4
 ; CHECK: .extern .global .attribute(.managed) .align 8 .b32 managed_decl_g;
- at managed_decl_g = external addrspace(1) global i32*, align 8
+ at managed_decl_g = external addrspace(1) global ptr, align 8
 
 !nvvm.annotations = !{!0, !1}
-!0 = !{i32 addrspace(1)* @managed_g, !"managed", i32 1}
-!1 = !{i32* addrspace(1)* @managed_decl_g, !"managed", i32 1}
+!0 = !{ptr addrspace(1) @managed_g, !"managed", i32 1}
+!1 = !{ptr addrspace(1) @managed_decl_g, !"managed", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/mbarrier.ll b/llvm/test/CodeGen/NVPTX/mbarrier.ll
index ae97f96a2d170..c85131bd5d881 100644
--- a/llvm/test/CodeGen/NVPTX/mbarrier.ll
+++ b/llvm/test/CodeGen/NVPTX/mbarrier.ll
@@ -3,143 +3,143 @@
 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 
-declare void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
-declare void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
+declare void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
+declare void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b)
 
 ; CHECK-LABEL: barrierinit
-define void @barrierinit(i64* %a, i32 %b) {
+define void @barrierinit(ptr %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.init.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.init.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  tail call void @llvm.nvvm.mbarrier.init(i64* %a, i32 %b)
+  tail call void @llvm.nvvm.mbarrier.init(ptr %a, i32 %b)
   ret void
 }
 
 ; CHECK-LABEL: barrierinitshared
-define void @barrierinitshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierinitshared(ptr addrspace(3) %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.init.shared.b64 [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.init.shared.b64 [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  tail call void @llvm.nvvm.mbarrier.init.shared(i64 addrspace(3)* %a, i32 %b)
+  tail call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %a, i32 %b)
   ret void
 }
 
-declare void @llvm.nvvm.mbarrier.inval(i64* %a)
-declare void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
+declare void @llvm.nvvm.mbarrier.inval(ptr %a)
+declare void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a)
 
 ; CHECK-LABEL: barrierinval
-define void @barrierinval(i64* %a) {
+define void @barrierinval(ptr %a) {
 ; CHECK_PTX32: mbarrier.inval.b64 [%r{{[0-1]+}}];
 ; CHECK_PTX64: mbarrier.inval.b64 [%rd{{[0-1]+}}];
-  tail call void @llvm.nvvm.mbarrier.inval(i64* %a)
+  tail call void @llvm.nvvm.mbarrier.inval(ptr %a)
   ret void
 }
 
 ; CHECK-LABEL: barrierinvalshared
-define void @barrierinvalshared(i64 addrspace(3)* %a) {
+define void @barrierinvalshared(ptr addrspace(3) %a) {
 ; CHECK_PTX32: mbarrier.inval.shared.b64 [%r{{[0-1]+}}];
 ; CHECK_PTX64: mbarrier.inval.shared.b64 [%rd{{[0-1]+}}];
-  tail call void @llvm.nvvm.mbarrier.inval.shared(i64 addrspace(3)* %a)
+  tail call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %a)
   ret void
 }
 
-declare i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
-declare i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+declare i64 @llvm.nvvm.mbarrier.arrive(ptr %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a)
 
 ; CHECK-LABEL: barrierarrive
-define void @barrierarrive(i64* %a) {
+define void @barrierarrive(ptr %a) {
 ; CHECK_PTX32: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
 ; CHECK_PTX64: mbarrier.arrive.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(i64* %a)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive(ptr %a)
   ret void
 }
 
 ; CHECK-LABEL: barrierarriveshared
-define void @barrierarriveshared(i64 addrspace(3)* %a) {
+define void @barrierarriveshared(ptr addrspace(3) %a) {
 ; CHECK_PTX32: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
 ; CHECK_PTX64: mbarrier.arrive.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(i64 addrspace(3)* %a)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %a)
   ret void
 }
 
-declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
-declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b)
 
 ; CHECK-LABEL: barrierarrivenoComplete
-define void @barrierarrivenoComplete(i64* %a, i32 %b) {
+define void @barrierarrivenoComplete(ptr %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.arrive.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(i64* %a, i32 %b)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %a, i32 %b)
   ret void
 }
 
 ; CHECK-LABEL: barrierarrivenoCompleteshared
-define void @barrierarrivenoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierarrivenoCompleteshared(ptr addrspace(3) %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.arrive.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %a, i32 %b)
   ret void
 }
 
-declare i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a)
 
 ; CHECK-LABEL: barrierarrivedrop
-define void @barrierarrivedrop(i64* %a) {
+define void @barrierarrivedrop(ptr %a) {
 ; CHECK_PTX32: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
 ; CHECK_PTX64: mbarrier.arrive_drop.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(i64* %a)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop(ptr %a)
   ret void
 }
 
 ; CHECK-LABEL: barrierarrivedropshared
-define void @barrierarrivedropshared(i64 addrspace(3)* %a) {
+define void @barrierarrivedropshared(ptr addrspace(3) %a) {
 ; CHECK_PTX32: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}];
 ; CHECK_PTX64: mbarrier.arrive_drop.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}];
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(i64 addrspace(3)* %a)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.shared(ptr addrspace(3) %a)
   ret void
 }
 
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
-declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b)
 
 ; CHECK-LABEL: barrierarrivedropnoComplete
-define void @barrierarrivedropnoComplete(i64* %a, i32 %b) {
+define void @barrierarrivedropnoComplete(ptr %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.arrive_drop.noComplete.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(i64* %a, i32 %b)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete(ptr %a, i32 %b)
   ret void
 }
 
 ; CHECK-LABEL: barrierarrivedropnoCompleteshared
-define void @barrierarrivedropnoCompleteshared(i64 addrspace(3)* %a, i32 %b) {
+define void @barrierarrivedropnoCompleteshared(ptr addrspace(3) %a, i32 %b) {
 ; CHECK_PTX32: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%r{{[0-9]+}}], %r{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.arrive_drop.noComplete.shared.b64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}], %r{{[0-9]+}};
-  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(i64 addrspace(3)* %a, i32 %b)
+  %ret = tail call i64 @llvm.nvvm.mbarrier.arrive.drop.noComplete.shared(ptr addrspace(3) %a, i32 %b)
   ret void
 }
 
-declare i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
-declare i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
+declare i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b)
+declare i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b)
 
 ; CHECK-LABEL: barriertestwait
-define void @barriertestwait(i64* %a, i64 %b) {
+define void @barriertestwait(ptr %a, i64 %b) {
 ; CHECK_PTX32: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.test_wait.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
-  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(i64* %a, i64 %b)
+  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait(ptr %a, i64 %b)
   ret void
 }
 
 ; CHECK-LABEL: barriertestwaitshared
-define void @barriertestwaitshared(i64 addrspace(3)* %a, i64 %b) {
+define void @barriertestwaitshared(ptr addrspace(3) %a, i64 %b) {
 ; CHECK_PTX32: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%r{{[0-9]+}}], %rd{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.test_wait.shared.b64 %p{{[0-9]+}}, [%rd{{[0-9]+}}], %rd{{[0-9]+}};
-  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(i64 addrspace(3)* %a, i64 %b)
+  %ret = tail call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %a, i64 %b)
   ret void
 }
 
 declare i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)
 
 ; CHECK-LABEL: barrierpendingcount
-define i32 @barrierpendingcount(i64* %a, i64 %b) {
+define i32 @barrierpendingcount(ptr %a, i64 %b) {
 ; CHECK_PTX32: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
 ; CHECK_PTX64: mbarrier.pending_count.b64 %r{{[0-9]+}}, %rd{{[0-9]+}};
   %ret = tail call i32 @llvm.nvvm.mbarrier.pending.count(i64 %b)

diff  --git a/llvm/test/CodeGen/NVPTX/minmax-negative.ll b/llvm/test/CodeGen/NVPTX/minmax-negative.ll
index 9567d98e81725..c83fe79816e97 100644
--- a/llvm/test/CodeGen/NVPTX/minmax-negative.ll
+++ b/llvm/test/CodeGen/NVPTX/minmax-negative.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=nvptx -O0 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -O0 | %ptxas-verify %}
 
-define i16 @test1(i16* %sur1) {
+define i16 @test1(ptr %sur1) {
 ; CHECK-NOT: mov.u16 %rs{{[0-9]+}}, 32767
   %_tmp21.i = icmp sle i16 0, 0
   %_tmp22.i = select i1 %_tmp21.i, i16 0, i16 32767
-  store i16 %_tmp22.i, i16* %sur1
+  store i16 %_tmp22.i, ptr %sur1
   ret i16 0
 }

diff  --git a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
index 2b2ab8f70d084..7dcd351267a9d 100644
--- a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
+++ b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
@@ -5,40 +5,36 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "nvptx64-nvidia-cuda"
 
 ; CHECK-LABEL: t1
-define <4 x float> @t1(i8* %p1) {
+define <4 x float> @t1(ptr %p1) {
 ; CHECK-NOT: ld.v4
 ; CHECK-NOT: ld.v2
 ; CHECK-NOT: ld.f32
 ; CHECK: ld.u8
-  %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>, <4 x float>* %cast, align 1
+  %r = load <4 x float>, ptr %p1, align 1
   ret <4 x float> %r
 }
 
 ; CHECK-LABEL: t2
-define <4 x float> @t2(i8* %p1) {
+define <4 x float> @t2(ptr %p1) {
 ; CHECK-NOT: ld.v4
 ; CHECK-NOT: ld.v2
 ; CHECK: ld.f32
-  %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>, <4 x float>* %cast, align 4
+  %r = load <4 x float>, ptr %p1, align 4
   ret <4 x float> %r
 }
 
 ; CHECK-LABEL: t3
-define <4 x float> @t3(i8* %p1) {
+define <4 x float> @t3(ptr %p1) {
 ; CHECK-NOT: ld.v4
 ; CHECK: ld.v2
-  %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>, <4 x float>* %cast, align 8
+  %r = load <4 x float>, ptr %p1, align 8
   ret <4 x float> %r
 }
 
 ; CHECK-LABEL: t4
-define <4 x float> @t4(i8* %p1) {
+define <4 x float> @t4(ptr %p1) {
 ; CHECK: ld.v4
-  %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>, <4 x float>* %cast, align 16
+  %r = load <4 x float>, ptr %p1, align 16
   ret <4 x float> %r
 }
 
@@ -50,9 +46,9 @@ define <4 x float> @t4(i8* %p1) {
 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
 ; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
 ; CHECK: ret
-define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %to) {
-  %1 = load <1 x half>, <1 x half> * %from , align 1
-  store <1 x half> %1, <1 x half> * %to , align 1
+define void @test_v1halfp0a1(ptr noalias readonly %from, ptr %to) {
+  %1 = load <1 x half>, ptr %from , align 1
+  store <1 x half> %1, ptr %to , align 1
   ret void
 }
 
@@ -68,9 +64,9 @@ define void @test_v1halfp0a1(<1 x half> * noalias readonly %from, <1 x half> * %
 ; CHECK-DAG: ld.u8        [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3]
 ; CHECK-DAG: st.u8        [%[[TO]]+3],
 ; CHECK: ret
-define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %to) {
-  %1 = load <2 x half>, <2 x half> * %from , align 1
-  store <2 x half> %1, <2 x half> * %to , align 1
+define void @test_v2halfp0a1(ptr noalias readonly %from, ptr %to) {
+  %1 = load <2 x half>, ptr %from , align 1
+  store <2 x half> %1, ptr %to , align 1
   ret void
 }
 
@@ -94,43 +90,43 @@ define void @test_v2halfp0a1(<2 x half> * noalias readonly %from, <2 x half> * %
 ; CHECK-DAG: ld.u8        [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7]
 ; CHECK-DAG: st.u8        [%[[TO]]+7], [[B7]]
 ; CHECK: ret
-define void @test_v4halfp0a1(<4 x half> * noalias readonly %from, <4 x half> * %to) {
-  %1 = load <4 x half>, <4 x half> * %from , align 1
-  store <4 x half> %1, <4 x half> * %to , align 1
+define void @test_v4halfp0a1(ptr noalias readonly %from, ptr %to) {
+  %1 = load <4 x half>, ptr %from , align 1
+  store <4 x half> %1, ptr %to , align 1
   ret void
 }
 
 
 ; CHECK-LABEL: s1
-define void @s1(<4 x float>* %p1, <4 x float> %v) {
+define void @s1(ptr %p1, <4 x float> %v) {
 ; CHECK-NOT: st.v4
 ; CHECK-NOT: st.v2
 ; CHECK-NOT: st.f32
 ; CHECK: st.u8
-  store <4 x float> %v, <4 x float>* %p1, align 1
+  store <4 x float> %v, ptr %p1, align 1
   ret void
 }
 
 ; CHECK-LABEL: s2
-define void @s2(<4 x float>* %p1, <4 x float> %v) {
+define void @s2(ptr %p1, <4 x float> %v) {
 ; CHECK-NOT: st.v4
 ; CHECK-NOT: st.v2
 ; CHECK: st.f32
-  store <4 x float> %v, <4 x float>* %p1, align 4
+  store <4 x float> %v, ptr %p1, align 4
   ret void
 }
 
 ; CHECK-LABEL: s3
-define void @s3(<4 x float>* %p1, <4 x float> %v) {
+define void @s3(ptr %p1, <4 x float> %v) {
 ; CHECK-NOT: st.v4
-  store <4 x float> %v, <4 x float>* %p1, align 8
+  store <4 x float> %v, ptr %p1, align 8
   ret void
 }
 
 ; CHECK-LABEL: s4
-define void @s4(<4 x float>* %p1, <4 x float> %v) {
+define void @s4(ptr %p1, <4 x float> %v) {
 ; CHECK: st.v4
-  store <4 x float> %v, <4 x float>* %p1, align 16
+  store <4 x float> %v, ptr %p1, align 16
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
index b695c69cb0e34..9ce910a29b7f0 100644
--- a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
+++ b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
@@ -6,10 +6,10 @@
 
 @"$str" = private addrspace(1) constant [4 x i8] c"str\00"
 
-declare void @str2(i8* %str)
+declare void @str2(ptr %str)
 define void @str1() {
 entry:
 ;; CHECK: mov.u64 %rd{{[0-9]+}}, $str;
-  tail call void @str2(i8* getelementptr ([4 x i8], [4 x i8]* addrspacecast ([4 x i8] addrspace(1)* @"$str" to [4 x i8]*), i64 0, i64 0))
+  tail call void @str2(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index ca7fb6eddfe8b..2bc6d4cfa7f6d 100644
--- a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -7,60 +7,58 @@
 ; CHECK-NOT: call void @llvm.nvvm.barrier0
 
 ; Function Attrs: nounwind
-define void @foo(float* %output) #1 {
+define void @foo(ptr %output) #1 {
 entry:
-  %output.addr = alloca float*, align 8
-  store float* %output, float** %output.addr, align 8
-  %0 = load float*, float** %output.addr, align 8
-  %arrayidx = getelementptr inbounds float, float* %0, i64 0
-  %1 = load float, float* %arrayidx, align 4
+  %output.addr = alloca ptr, align 8
+  store ptr %output, ptr %output.addr, align 8
+  %0 = load ptr, ptr %output.addr, align 8
+  %1 = load float, ptr %0, align 4
   %conv = fpext float %1 to double
   %cmp = fcmp olt double %conv, 1.000000e+01
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %2 = load float*, float** %output.addr, align 8
-  %3 = load float, float* %2, align 4
+  %2 = load ptr, ptr %output.addr, align 8
+  %3 = load float, ptr %2, align 4
   %conv1 = fpext float %3 to double
   %add = fadd double %conv1, 1.000000e+00
   %conv2 = fptrunc double %add to float
-  store float %conv2, float* %2, align 4
+  store float %conv2, ptr %2, align 4
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %4 = load float*, float** %output.addr, align 8
-  %5 = load float, float* %4, align 4
+  %4 = load ptr, ptr %output.addr, align 8
+  %5 = load float, ptr %4, align 4
   %conv3 = fpext float %5 to double
   %add4 = fadd double %conv3, 2.000000e+00
   %conv5 = fptrunc double %add4 to float
-  store float %conv5, float* %4, align 4
+  store float %conv5, ptr %4, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
   call void @llvm.nvvm.barrier0()
-  %6 = load float*, float** %output.addr, align 8
-  %arrayidx6 = getelementptr inbounds float, float* %6, i64 0
-  %7 = load float, float* %arrayidx6, align 4
+  %6 = load ptr, ptr %output.addr, align 8
+  %7 = load float, ptr %6, align 4
   %conv7 = fpext float %7 to double
   %cmp8 = fcmp olt double %conv7, 1.000000e+01
   br i1 %cmp8, label %if.then9, label %if.else13
 
 if.then9:                                         ; preds = %if.end
-  %8 = load float*, float** %output.addr, align 8
-  %9 = load float, float* %8, align 4
+  %8 = load ptr, ptr %output.addr, align 8
+  %9 = load float, ptr %8, align 4
   %conv10 = fpext float %9 to double
   %add11 = fadd double %conv10, 3.000000e+00
   %conv12 = fptrunc double %add11 to float
-  store float %conv12, float* %8, align 4
+  store float %conv12, ptr %8, align 4
   br label %if.end17
 
 if.else13:                                        ; preds = %if.end
-  %10 = load float*, float** %output.addr, align 8
-  %11 = load float, float* %10, align 4
+  %10 = load ptr, ptr %output.addr, align 8
+  %11 = load float, ptr %10, align 4
   %conv14 = fpext float %11 to double
   %add15 = fadd double %conv14, 4.000000e+00
   %conv16 = fptrunc double %add15 to float
-  store float %conv16, float* %10, align 4
+  store float %conv16, ptr %10, align 4
   br label %if.end17
 
 if.end17:                                         ; preds = %if.else13, %if.then9
@@ -70,5 +68,5 @@ if.end17:                                         ; preds = %if.else13, %if.then
 ; Function Attrs: noduplicate nounwind
 declare void @llvm.nvvm.barrier0() #2
 
-!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}
 !1 = !{null, !"align", i32 8}

diff  --git a/llvm/test/CodeGen/NVPTX/nofunc.ll b/llvm/test/CodeGen/NVPTX/nofunc.ll
index 9ae97993fd255..33b26210165a9 100644
--- a/llvm/test/CodeGen/NVPTX/nofunc.ll
+++ b/llvm/test/CodeGen/NVPTX/nofunc.ll
@@ -10,8 +10,8 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-nvidia-cuda"
 
 @Funcs = local_unnamed_addr addrspace(1) externally_initialized
-         global [1 x void (i8*)*] [void (i8*)* @func], align 8
+         global [1 x ptr] [ptr @func], align 8
 
-declare void @func(i8*)
+declare void @func(ptr)
 
 ; CHECK: Funcs[1] = {func}

diff  --git a/llvm/test/CodeGen/NVPTX/nounroll.ll b/llvm/test/CodeGen/NVPTX/nounroll.ll
index 8a42e0f3d0dce..abcaa6843e40a 100644
--- a/llvm/test/CodeGen/NVPTX/nounroll.ll
+++ b/llvm/test/CodeGen/NVPTX/nounroll.ll
@@ -9,7 +9,7 @@ target triple = "nvptx64-unknown-unknown"
 ;   #pragma nounroll
 ;   for (int i = 0; i < 2; ++i)
 ;     output[i] = input[i];
-define void @nounroll(float* %input, float* %output) {
+define void @nounroll(ptr %input, ptr %output) {
 ; CHECK-LABEL: .visible .func nounroll(
 entry:
   br label %for.body
@@ -18,11 +18,11 @@ for.body:
 ; CHECK: .pragma "nounroll"
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.06 to i64
-  %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %input, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
 ; CHECK: ld.f32
-  %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %output, i64 %idxprom
+  store float %0, ptr %arrayidx2, align 4
 ; CHECK: st.f32
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, 2
@@ -39,7 +39,7 @@ for.end:
 ;   #pragma unroll 1
 ;   for (int i = 0; i < 2; ++i)
 ;     output[i] = input[i];
-define void @unroll1(float* %input, float* %output) {
+define void @unroll1(ptr %input, ptr %output) {
 ; CHECK-LABEL: .visible .func unroll1(
 entry:
   br label %for.body
@@ -48,11 +48,11 @@ for.body:
 ; CHECK: .pragma "nounroll"
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.06 to i64
-  %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %input, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
 ; CHECK: ld.f32
-  %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %output, i64 %idxprom
+  store float %0, ptr %arrayidx2, align 4
 ; CHECK: st.f32
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, 2

diff  --git a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
index 702f4c1840569..12f9ba38f5e3b 100644
--- a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll
@@ -3,7 +3,7 @@
 
 target triple = "nvptx-unknown-nvcl"
 
-define void @foo(i64 %img, i64 %sampler, <5 x float>* align 32 %v1, i32* %v2) {
+define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) {
 ; The parameter alignment is determined by the align attribute (default 1).
 ; CHECK-LABEL: .entry foo(
 ; CHECK: .param .u32 .ptr .align 32 foo_param_2
@@ -12,6 +12,6 @@ define void @foo(i64 %img, i64 %sampler, <5 x float>* align 32 %v1, i32* %v2) {
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"rdoimage", i32 0}
-!3 = !{void (i64, i64, <5 x float>*, i32*)* @foo, !"sampler", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdoimage", i32 0}
+!3 = !{ptr @foo, !"sampler", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
index 7e7cde8a90f39..c4be3b0ab8a32 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll
@@ -8,12 +8,12 @@
 
 @"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
 
-declare i32 @__nvvm_reflect(i8*)
+declare i32 @__nvvm_reflect(ptr)
 
 ; COMMON-LABEL: @foo
 define i32 @foo(float %a, float %b) {
 ; COMMON-NOT: call i32 @__nvvm_reflect
-  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+  %reflect = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
 ; SM20: ret i32 200  
 ; SM35: ret i32 350  
   ret i32 %reflect

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll
index 845001bfef875..fffbe0cfd7584 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-module-flag.ll
@@ -1,11 +1,11 @@
 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s
 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -passes=nvvm-reflect | FileCheck %s
 
-declare i32 @__nvvm_reflect(i8*)
+declare i32 @__nvvm_reflect(ptr)
 @str = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
 
 define i32 @foo() {
-  %call = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+  %call = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @str to ptr))
   ; CHECK: ret i32 42
   ret i32 %call
 }

diff  --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
index ec0c787852cb1..c482ac087c410 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -13,14 +13,14 @@
 
 @str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
 
-declare i32 @__nvvm_reflect(i8*)
-declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+declare i32 @__nvvm_reflect(ptr)
+declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
 
 ; CHECK-LABEL: @foo
 define float @foo(float %a, float %b) {
 ; CHECK-NOT: call i32 @__nvvm_reflect
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
-  %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+  %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
+  %reflect = tail call i32 @__nvvm_reflect(ptr %ptr)
   %cmp = icmp ugt i32 %reflect, 0
   br i1 %cmp, label %use_mul, label %use_add
 
@@ -41,15 +41,15 @@ exit:
   ret float %ret
 }
 
-declare i32 @llvm.nvvm.reflect.p0i8(i8*)
+declare i32 @llvm.nvvm.reflect.p0(ptr)
 
 ; CHECK-LABEL: define i32 @intrinsic
 define i32 @intrinsic() {
 ; CHECK-NOT: call i32 @llvm.nvvm.reflect
 ; USE_FTZ_0: ret i32 0
 ; USE_FTZ_1: ret i32 1
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
-  %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
+  %ptr = tail call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) @str)
+  %reflect = tail call i32 @llvm.nvvm.reflect.p0(ptr %ptr)
   ret i32 %reflect
 }
 
@@ -61,7 +61,7 @@ define i32 @intrinsic() {
 ; CHECK-LABEL: @bar
 define float @bar(float %a, float %b) {
 ; CHECK-NOT: call i32 @__nvvm_reflect
-  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+  %reflect = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
   %cmp = icmp ne i32 %reflect, 0
   br i1 %cmp, label %use_mul, label %use_add
 

diff  --git a/llvm/test/CodeGen/NVPTX/packed-aggr.ll b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
index dba888a87e483..e741a92a2c983 100644
--- a/llvm/test/CodeGen/NVPTX/packed-aggr.ll
+++ b/llvm/test/CodeGen/NVPTX/packed-aggr.ll
@@ -16,22 +16,22 @@ declare void @func()
 ; CHECK: .extern .func func
 ; CHECK: .u8 p;
 
-%t1 = type <{ i16, i8*, i8, void ()*, i8*, i32 }>
+%t1 = type <{ i16, ptr, i8, ptr, ptr, i32 }>
 @s1 = addrspace(1) global %t1 <{
 ; ERROR: initialized packed aggregate with pointers 's1' requires at least PTX ISA version 7.1
 ; CHECK32: .global .align 1 .u8 s1[19] = {
 ; CHECK64: .global .align 1 .u8 s1[31] = {
     i16 12,
 ; CHECK-SAME:   12, 0,
-    i8* addrspacecast (i8 addrspace(1)* @p to i8*),
+    ptr addrspacecast (ptr addrspace(1) @p to ptr),
 ; CHECK-SAME:   0xFF(generic(p)), 0xFF00(generic(p)), 0xFF0000(generic(p)), 0xFF000000(generic(p)),
 ; CHECK64-SAME: 0xFF00000000(generic(p)), 0xFF0000000000(generic(p)), 0xFF000000000000(generic(p)), 0xFF00000000000000(generic(p)),
     i8 34,
 ; CHECK-SAME:   34
-    void ()* @func,
+    ptr @func,
 ; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
 ; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
-    i8* addrspacecast (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* @p, i32 3) to i8*),
+    ptr addrspacecast (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @p, i32 3) to ptr),
 ; CHECK-SAME:   0xFF(generic(p)+3), 0xFF00(generic(p)+3), 0xFF0000(generic(p)+3), 0xFF000000(generic(p)+3),
 ; CHECK64-SAME: 0xFF00000000(generic(p)+3), 0xFF0000000000(generic(p)+3), 0xFF000000000000(generic(p)+3), 0xFF00000000000000(generic(p)+3),
     i32 56 }>, align 1
@@ -39,14 +39,14 @@ declare void @func()
 
 ;; Test a case than an unaligned pointer is in a nested struct.
 
-%t2i = type <{ void ()* }>
+%t2i = type <{ ptr }>
 %t2o = type { i8, %t2i, i32 }
 @s2 = addrspace(1) global %t2o {
 ; CHECK32: .global .align 8 .u8 s2[12] = {
 ; CHECK64: .global .align 8 .u8 s2[16] = {
     i8 12,
 ; CHECK-SAME:   12,
-    %t2i <{ void()* @func }>,
+    %t2i <{ ptr @func }>,
 ; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
 ; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
     i32 34}
@@ -57,11 +57,11 @@ declare void @func()
 ;; is printed in bytes and uses the mask() operator for pointers even though
 ;; the pointers are aligned.
 
-%t3 = type <{ void ()*, i8 }>
+%t3 = type <{ ptr, i8 }>
 @s3 = addrspace(1) global %t3 <{
 ; CHECK32: .global .align 1 .u8 s3[5] = {
 ; CHECK64: .global .align 1 .u8 s3[9] = {
-    void ()* @func,
+    ptr @func,
 ; CHECK-SAME:   0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func),
 ; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func),
     i8 56 }>, align 1
@@ -69,11 +69,11 @@ declare void @func()
 
 ;; Test that a packed struct with aligned pointers is printed in words.
 
-%t4 = type <{ void ()*, i64 }>
+%t4 = type <{ ptr, i64 }>
 @s4 = addrspace(1) global %t4 <{
 ; CHECK32: .global .align 1 .u32 s4[3] = {
 ; CHECK64: .global .align 1 .u64 s4[2] = {
-    void()* @func,
+    ptr @func,
 ; CHECK-SAME:   func,
     i64 15}>, align 1
 ; CHECK32-SAME: 15, 0};
@@ -81,8 +81,8 @@ declare void @func()
 
 ;; Test that a packed struct with unaligned pointers inside an array is handled.
 
-%t5 = type <{ void ()*, i16 }>
- at a5 = addrspace(1) global [2 x %t5] [%t5 <{ void()* @func, i16 5 }>, %t5 <{ void()* @func, i16 9 }> ]
+%t5 = type <{ ptr, i16 }>
+ at a5 = addrspace(1) global [2 x %t5] [%t5 <{ ptr @func, i16 5 }>, %t5 <{ ptr @func, i16 9 }> ]
 ; CHECK32: .global .align 8 .u8 a5[12] = {
 ; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 5, 0,
 ; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), 9, 0};

diff  --git a/llvm/test/CodeGen/NVPTX/param-align.ll b/llvm/test/CodeGen/NVPTX/param-align.ll
index 40a523f819493..022a7502f0f7d 100644
--- a/llvm/test/CodeGen/NVPTX/param-align.ll
+++ b/llvm/test/CodeGen/NVPTX/param-align.ll
@@ -1,16 +1,16 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 
-;;; Need 4-byte alignment on float* passed byval
-define ptx_device void @t1(float* byval(float) %x) {
+;;; Need 4-byte alignment on ptr passed byval
+define ptx_device void @t1(ptr byval(float) %x) {
 ; CHECK: .func t1
 ; CHECK: .param .align 4 .b8 t1_param_0[4]
   ret void
 }
 
 
-;;; Need 8-byte alignment on double* passed byval
-define ptx_device void @t2(double* byval(double) %x) {
+;;; Need 8-byte alignment on ptr passed byval
+define ptx_device void @t2(ptr byval(double) %x) {
 ; CHECK: .func t2
 ; CHECK: .param .align 8 .b8 t2_param_0[8]
   ret void
@@ -19,7 +19,7 @@ define ptx_device void @t2(double* byval(double) %x) {
 
 ;;; Need 4-byte alignment on float2* passed byval
 %struct.float2 = type { float, float }
-define ptx_device void @t3(%struct.float2* byval(%struct.float2) %x) {
+define ptx_device void @t3(ptr byval(%struct.float2) %x) {
 ; CHECK: .func t3
 ; CHECK: .param .align 4 .b8 t3_param_0[8]
   ret void
@@ -27,19 +27,19 @@ define ptx_device void @t3(%struct.float2* byval(%struct.float2) %x) {
 
 ;;; Need at least 4-byte alignment in order to avoid miscompilation by
 ;;; ptxas for sm_50+
-define ptx_device void @t4(i8* byval(i8) %x) {
+define ptx_device void @t4(ptr byval(i8) %x) {
 ; CHECK: .func t4
 ; CHECK: .param .align 4 .b8 t4_param_0[1]
   ret void
 }
 
 ;;; Make sure we adjust alignment at the call site as well.
-define ptx_device void @t5(i8* align 2 byval(i8) %x) {
+define ptx_device void @t5(ptr align 2 byval(i8) %x) {
 ; CHECK: .func t5
 ; CHECK: .param .align 4 .b8 t5_param_0[1]
 ; CHECK: {
 ; CHECK: .param .align 4 .b8 param0[1];
 ; CHECK: call.uni
-  call void @t4(i8* byval(i8) %x)
+  call void @t4(ptr byval(i8) %x)
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
index a6ca645193441..374ab67dc46ca 100644
--- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -3,17 +3,17 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
 
-define ptx_kernel void @t1(i1* %a) {
+define ptx_kernel void @t1(ptr %a) {
 ; PTX32:      mov.u16 %rs{{[0-9]+}}, 0;
 ; PTX32-NEXT: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
 ; PTX64:      mov.u16 %rs{{[0-9]+}}, 0;
 ; PTX64-NEXT: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
-  store i1 false, i1* %a
+  store i1 false, ptr %a
   ret void
 }
 
 
-define ptx_kernel void @t2(i1* %a, i8* %b) {
+define ptx_kernel void @t2(ptr %a, ptr %b) {
 ; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
 ; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 ; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
@@ -21,8 +21,8 @@ define ptx_kernel void @t2(i1* %a, i8* %b) {
 ; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 ; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 
-  %t1 = load i1, i1* %a
+  %t1 = load i1, ptr %a
   %t2 = select i1 %t1, i8 1, i8 2
-  store i8 %t2, i8* %b
+  store i8 %t2, ptr %b
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/pr16278.ll b/llvm/test/CodeGen/NVPTX/pr16278.ll
index 044f8b3a0af50..da1d81a6ad487 100644
--- a/llvm/test/CodeGen/NVPTX/pr16278.ll
+++ b/llvm/test/CodeGen/NVPTX/pr16278.ll
@@ -5,6 +5,6 @@
 
 define float @foo() {
 ; CHECK: ld.const.f32
-  %val = load float, float addrspace(4)* @one_f
+  %val = load float, ptr addrspace(4) @one_f
   ret float %val
 }

diff  --git a/llvm/test/CodeGen/NVPTX/pr17529.ll b/llvm/test/CodeGen/NVPTX/pr17529.ll
index eb332bc8bcf7a..6919c00c7c52b 100644
--- a/llvm/test/CodeGen/NVPTX/pr17529.ll
+++ b/llvm/test/CodeGen/NVPTX/pr17529.ll
@@ -6,15 +6,14 @@ target triple = "nvptx64-nvidia-cuda"
 
 ; Function Attrs: nounwind
 ; CHECK: .func kernelgen_memcpy
-define ptx_device void @kernelgen_memcpy(i8* nocapture %dst) #0 {
+define ptx_device void @kernelgen_memcpy(ptr nocapture %dst) #0 {
 entry:
   br i1 undef, label %for.end, label %vector.body
 
 vector.body:                                      ; preds = %vector.body, %entry
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
-  %scevgep9 = getelementptr i8, i8* %dst, i64 %index
-  %scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
-  store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
+  %scevgep9 = getelementptr i8, ptr %dst, i64 %index
+  store <4 x i8> undef, ptr %scevgep9, align 1
   %index.next = add i64 %index, 4
   %0 = icmp eq i64 undef, %index.next
   br i1 %0, label %middle.block, label %vector.body
@@ -23,13 +22,12 @@ middle.block:                                     ; preds = %vector.body
   br i1 undef, label %for.end, label %for.body.preheader1
 
 for.body.preheader1:                              ; preds = %middle.block
-  %scevgep2 = getelementptr i8, i8* %dst, i64 0
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.preheader1
-  %lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
-  store i8 undef, i8* %lsr.iv3, align 1
-  %scevgep4 = getelementptr i8, i8* %lsr.iv3, i64 1
+  %lsr.iv3 = phi ptr [ %dst, %for.body.preheader1 ], [ %scevgep4, %for.body ]
+  store i8 undef, ptr %lsr.iv3, align 1
+  %scevgep4 = getelementptr i8, ptr %lsr.iv3, i64 1
   br label %for.body
 
 for.end:                                          ; preds = %middle.block, %entry

diff  --git a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll
index b07b36c779300..b15fe0950e773 100644
--- a/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll
+++ b/llvm/test/CodeGen/NVPTX/read-global-variable-constant.ll
@@ -11,20 +11,20 @@
 ; CHECK-LABEL: test_gv_float()
 define float @test_gv_float() {
 ; CHECK: ld.global.nc.f32
-  %v = load float, float* @gv_float
+  %v = load float, ptr @gv_float
   ret float %v
 }
 
 ; CHECK-LABEL: test_gv_float2()
 define <2 x float> @test_gv_float2() {
 ; CHECK: ld.global.nc.v2.f32
-  %v = load <2 x float>, <2 x float>* @gv_float2
+  %v = load <2 x float>, ptr @gv_float2
   ret <2 x float> %v
 }
 
 ; CHECK-LABEL: test_gv_float4()
 define <4 x float> @test_gv_float4() {
 ; CHECK: ld.global.nc.v4.f32
-  %v = load <4 x float>, <4 x float>* @gv_float4
+  %v = load <4 x float>, ptr @gv_float4
   ret <4 x float> %v
 }

diff  --git a/llvm/test/CodeGen/NVPTX/refl1.ll b/llvm/test/CodeGen/NVPTX/refl1.ll
index 222c479832cf1..5010e5c37d3c0 100644
--- a/llvm/test/CodeGen/NVPTX/refl1.ll
+++ b/llvm/test/CodeGen/NVPTX/refl1.ll
@@ -5,10 +5,10 @@ target triple = "nvptx-nvidia-cuda"
 
 ; Function Attrs: nounwind
 ; CHECK: .entry foo
-define void @foo(float* nocapture %a) #0 {
-  %val = load float, float* %a
+define void @foo(ptr nocapture %a) #0 {
+  %val = load float, ptr %a
   %tan = tail call fastcc float @__nv_fast_tanf(float %val)
-  store float %tan, float* %a
+  store float %tan, ptr %a
   ret void
 }
 
@@ -37,4 +37,4 @@ attributes #2 = { alwaysinline inlinehint nounwind readnone }
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!0 = !{ptr @foo, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/reg-copy.ll b/llvm/test/CodeGen/NVPTX/reg-copy.ll
index 2f096f95881d0..2cf4c9858af27 100644
--- a/llvm/test/CodeGen/NVPTX/reg-copy.ll
+++ b/llvm/test/CodeGen/NVPTX/reg-copy.ll
@@ -4,19 +4,19 @@
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64-unknown-unknown"
 
-define void @PR24303(float* %f) {
+define void @PR24303(ptr %f) {
 ; CHECK-LABEL: .visible .entry PR24303(
 ; Do not use mov.f or mov.u to convert between float and int.
 ; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}}
 ; CHECK-NOT: mov.{{f|u}}{{32|64}} %r{{[0-9]+}}, %f{{[0-9]+}}
 entry:
-  %arrayidx1 = getelementptr inbounds float, float* %f, i64 1
-  %0 = load float, float* %f, align 4
-  %1 = load float, float* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %f, i64 2
-  %arrayidx3 = getelementptr inbounds float, float* %f, i64 3
-  %2 = load float, float* %arrayidx2, align 4
-  %3 = load float, float* %arrayidx3, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %f, i64 1
+  %0 = load float, ptr %f, align 4
+  %1 = load float, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %f, i64 2
+  %arrayidx3 = getelementptr inbounds float, ptr %f, i64 3
+  %2 = load float, ptr %arrayidx2, align 4
+  %3 = load float, ptr %arrayidx3, align 4
   %mul.i = fmul float %0, %2
   %mul4.i = fmul float %1, %3
   %mul5.i = fmul float %0, %3
@@ -209,12 +209,10 @@ if.then.93.i:                                     ; preds = %if.then.88.i, %if.e
 _ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, %lor.lhs.false.67.i, %land.lhs.true.i, %entry
   %84 = phi i32 [ %4, %land.lhs.true.i ], [ %4, %entry ], [ %82, %if.then.93.i ], [ %4, %lor.lhs.false.67.i ]
   %85 = phi i32 [ %5, %land.lhs.true.i ], [ %5, %entry ], [ %83, %if.then.93.i ], [ %5, %lor.lhs.false.67.i ]
-  %arrayidx5 = getelementptr inbounds float, float* %f, i64 5
-  %86 = bitcast float* %arrayidx5 to i32*
-  store i32 %84, i32* %86, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %f, i64 6
-  %87 = bitcast float* %arrayidx7 to i32*
-  store i32 %85, i32* %87, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %f, i64 5
+  store i32 %84, ptr %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %f, i64 6
+  store i32 %85, ptr %arrayidx7, align 4
   ret void
 }
 
@@ -222,4 +220,4 @@ declare float @llvm.nvvm.fabs.f(float)
 
 !nvvm.annotations = !{!0}
 
-!0 = !{void (float*)* @PR24303, !"kernel", i32 1}
+!0 = !{ptr @PR24303, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/reg-types.ll b/llvm/test/CodeGen/NVPTX/reg-types.ll
index 41effcb8011b1..54ed66b0d9508 100644
--- a/llvm/test/CodeGen/NVPTX/reg-types.ll
+++ b/llvm/test/CodeGen/NVPTX/reg-types.ll
@@ -30,39 +30,39 @@ entry:
 ; CHECK-DAG: .reg .f64 %fd<
 
 ; Verify that we use correct register types.
-  store i8 1, i8* %s8, align 1
+  store i8 1, ptr %s8, align 1
 ; CHECK: mov.u16 [[R1:%rs[0-9]]], 1;
 ; CHECK-NEXT: st.u8 {{.*}}, [[R1]]
-  store i8 2, i8* %u8, align 1
+  store i8 2, ptr %u8, align 1
 ; CHECK: mov.u16 [[R2:%rs[0-9]]], 2;
 ; CHECK-NEXT: st.u8 {{.*}}, [[R2]]
-  store i16 3, i16* %s16, align 2
+  store i16 3, ptr %s16, align 2
 ; CHECK: mov.u16 [[R3:%rs[0-9]]], 3;
 ; CHECK-NEXT: st.u16 {{.*}}, [[R3]]
-  store i16 4, i16* %u16, align 2
+  store i16 4, ptr %u16, align 2
 ; CHECK: mov.u16 [[R4:%rs[0-9]]], 4;
 ; CHECK-NEXT: st.u16 {{.*}}, [[R4]]
-  store i32 5, i32* %s32, align 4
+  store i32 5, ptr %s32, align 4
 ; CHECK: mov.u32 [[R5:%r[0-9]]], 5;
 ; CHECK-NEXT: st.u32 {{.*}}, [[R5]]
-  store i32 6, i32* %u32, align 4
+  store i32 6, ptr %u32, align 4
 ; CHECK: mov.u32 [[R6:%r[0-9]]], 6;
 ; CHECK-NEXT: st.u32 {{.*}}, [[R6]]
-  store i64 7, i64* %s64, align 8
+  store i64 7, ptr %s64, align 8
 ; CHECK: mov.u64 [[R7:%rd[0-9]]], 7;
 ; CHECK-NEXT: st.u64 {{.*}}, [[R7]]
-  store i64 8, i64* %u64, align 8
+  store i64 8, ptr %u64, align 8
 ; CHECK: mov.u64 [[R8:%rd[0-9]]], 8;
 ; CHECK-NEXT: st.u64 {{.*}}, [[R8]]
 
 ; FP constants are stored via integer registers, but that's an
 ; implementation detail that's irrelevant here.
-  store float 9.000000e+00, float* %f32, align 4
-  store double 1.000000e+01, double* %f64, align 8
+  store float 9.000000e+00, ptr %f32, align 4
+  store double 1.000000e+01, ptr %f64, align 8
 ; Instead, we force a load into a register and then verify register type.
-  %f32v = load volatile float, float* %f32, align 4
+  %f32v = load volatile float, ptr %f32, align 4
 ; CHECK: ld.volatile.f32         %f{{[0-9]+}}
-  %f64v = load volatile double, double* %f64, align 8
+  %f64v = load volatile double, ptr %f64, align 8
 ; CHECK: ld.volatile.f64         %fd{{[0-9]+}}
   ret void
 ; CHECK: ret;

diff  --git a/llvm/test/CodeGen/NVPTX/sched1.ll b/llvm/test/CodeGen/NVPTX/sched1.ll
index 9eddc662cbbdc..a25888ac729da 100644
--- a/llvm/test/CodeGen/NVPTX/sched1.ll
+++ b/llvm/test/CodeGen/NVPTX/sched1.ll
@@ -3,7 +3,7 @@
 
 ; Ensure source scheduling is working
 
-define void @foo(i32* %a) {
+define void @foo(ptr %a) {
 ; CHECK: .func foo
 ; CHECK: ld.u32
 ; CHECK-NEXT: ld.u32
@@ -12,20 +12,19 @@ define void @foo(i32* %a) {
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
-  %ptr0 = getelementptr i32, i32* %a, i32 0
-  %val0 = load i32, i32* %ptr0
-  %ptr1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %ptr1
-  %ptr2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %ptr2
-  %ptr3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32, i32* %ptr3
+  %val0 = load i32, ptr %a
+  %ptr1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %ptr1
+  %ptr2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %ptr2
+  %ptr3 = getelementptr i32, ptr %a, i32 3
+  %val3 = load i32, ptr %ptr3
 
   %t0 = add i32 %val0, %val1
   %t1 = add i32 %t0, %val2
   %t2 = add i32 %t1, %val3
 
-  store i32 %t2, i32* %a
+  store i32 %t2, ptr %a
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/sched2.ll b/llvm/test/CodeGen/NVPTX/sched2.ll
index d43d76b48391f..11e577755e296 100644
--- a/llvm/test/CodeGen/NVPTX/sched2.ll
+++ b/llvm/test/CodeGen/NVPTX/sched2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 
-define void @foo(<2 x i32>* %a) {
+define void @foo(ptr %a) {
 ; CHECK: .func foo
 ; CHECK: ld.v2.u32
 ; CHECK-NEXT: ld.v2.u32
@@ -13,20 +13,19 @@ define void @foo(<2 x i32>* %a) {
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
-  %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0
-  %val0 = load <2 x i32>, <2 x i32>* %ptr0
-  %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1
-  %val1 = load <2 x i32>, <2 x i32>* %ptr1
-  %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2
-  %val2 = load <2 x i32>, <2 x i32>* %ptr2
-  %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3
-  %val3 = load <2 x i32>, <2 x i32>* %ptr3
+  %val0 = load <2 x i32>, ptr %a
+  %ptr1 = getelementptr <2 x i32>, ptr %a, i32 1
+  %val1 = load <2 x i32>, ptr %ptr1
+  %ptr2 = getelementptr <2 x i32>, ptr %a, i32 2
+  %val2 = load <2 x i32>, ptr %ptr2
+  %ptr3 = getelementptr <2 x i32>, ptr %a, i32 3
+  %val3 = load <2 x i32>, ptr %ptr3
 
   %t0 = add <2 x i32> %val0, %val1
   %t1 = add <2 x i32> %t0, %val2
   %t2 = add <2 x i32> %t1, %val3
 
-  store <2 x i32> %t2, <2 x i32>* %a
+  store <2 x i32> %t2, ptr %a
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/sext-in-reg.ll b/llvm/test/CodeGen/NVPTX/sext-in-reg.ll
index 4afa35d5d75bc..49fc9cc56befa 100644
--- a/llvm/test/CodeGen/NVPTX/sext-in-reg.ll
+++ b/llvm/test/CodeGen/NVPTX/sext-in-reg.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
 
-define void @one(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @one(i64 %a, i64 %b, ptr %p1, ptr %p2) {
 ; CHECK: cvt.s64.s8
 ; CHECK: cvt.s64.s8
 entry:
@@ -15,14 +15,14 @@ entry:
   %shr = ashr i64 %a, 16
   %shr9 = ashr i64 %b, 16
   %add = add nsw i64 %conv4, %conv1
-  store i64 %add, i64* %p1, align 8
+  store i64 %add, ptr %p1, align 8
   %add17 = add nsw i64 %shr9, %shr
-  store i64 %add17, i64* %p2, align 8
+  store i64 %add17, ptr %p2, align 8
   ret void
 }
 
 
-define void @two(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @two(i64 %a, i64 %b, ptr %p1, ptr %p2) {
 entry:
 ; CHECK: cvt.s64.s32
 ; CHECK: cvt.s64.s32
@@ -33,14 +33,14 @@ entry:
   %shr = ashr i64 %a, 16
   %shr9 = ashr i64 %b, 16
   %add = add nsw i64 %conv4, %conv1
-  store i64 %add, i64* %p1, align 8
+  store i64 %add, ptr %p1, align 8
   %add17 = add nsw i64 %shr9, %shr
-  store i64 %add17, i64* %p2, align 8
+  store i64 %add17, ptr %p2, align 8
   ret void
 }
 
 
-define void @three(i64 %a, i64 %b, i64* %p1, i64* %p2) {
+define void @three(i64 %a, i64 %b, ptr %p1, ptr %p2) {
 entry:
 ; CHECK: cvt.s64.s16
 ; CHECK: cvt.s64.s16
@@ -51,14 +51,14 @@ entry:
   %shr = ashr i64 %a, 16
   %shr9 = ashr i64 %b, 16
   %add = add nsw i64 %conv4, %conv1
-  store i64 %add, i64* %p1, align 8
+  store i64 %add, ptr %p1, align 8
   %add17 = add nsw i64 %shr9, %shr
-  store i64 %add17, i64* %p2, align 8
+  store i64 %add17, ptr %p2, align 8
   ret void
 }
 
 
-define void @four(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+define void @four(i32 %a, i32 %b, ptr %p1, ptr %p2) {
 entry:
 ; CHECK: cvt.s32.s8
 ; CHECK: cvt.s32.s8
@@ -69,14 +69,14 @@ entry:
   %shr = ashr i32 %a, 16
   %shr9 = ashr i32 %b, 16
   %add = add nsw i32 %conv4, %conv1
-  store i32 %add, i32* %p1, align 4
+  store i32 %add, ptr %p1, align 4
   %add17 = add nsw i32 %shr9, %shr
-  store i32 %add17, i32* %p2, align 4
+  store i32 %add17, ptr %p2, align 4
   ret void
 }
 
 
-define void @five(i32 %a, i32 %b, i32* %p1, i32* %p2) {
+define void @five(i32 %a, i32 %b, ptr %p1, ptr %p2) {
 entry:
 ; CHECK: cvt.s32.s16
 ; CHECK: cvt.s32.s16
@@ -87,14 +87,14 @@ entry:
   %shr = ashr i32 %a, 16
   %shr9 = ashr i32 %b, 16
   %add = add nsw i32 %conv4, %conv1
-  store i32 %add, i32* %p1, align 4
+  store i32 %add, ptr %p1, align 4
   %add17 = add nsw i32 %shr9, %shr
-  store i32 %add17, i32* %p2, align 4
+  store i32 %add17, ptr %p2, align 4
   ret void
 }
 
 
-define void @six(i16 %a, i16 %b, i16* %p1, i16* %p2) {
+define void @six(i16 %a, i16 %b, ptr %p1, ptr %p2) {
 entry:
 ; CHECK: cvt.s16.s8
 ; CHECK: cvt.s16.s8
@@ -105,8 +105,8 @@ entry:
   %shr = ashr i16 %a, 8
   %shr9 = ashr i16 %b, 8
   %add = add nsw i16 %conv4, %conv1
-  store i16 %add, i16* %p1, align 4
+  store i16 %add, ptr %p1, align 4
   %add17 = add nsw i16 %shr9, %shr
-  store i16 %add17, i16* %p2, align 4
+  store i16 %add17, ptr %p2, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/shfl.ll b/llvm/test/CodeGen/NVPTX/shfl.ll
index 5008d14a82324..3c83c7fbedfeb 100644
--- a/llvm/test/CodeGen/NVPTX/shfl.ll
+++ b/llvm/test/CodeGen/NVPTX/shfl.ll
@@ -62,30 +62,30 @@ define float @shfl_down_float(float %in) {
 
 ; Try the rest of the shfl modes.  Hopefully they're declared in such a way
 ; that if shfl.down works correctly, they also work correctly.
-define void @shfl_rest(i32 %in_i32, float %in_float, i32* %out_i32, float* %out_float) {
+define void @shfl_rest(i32 %in_i32, float %in_float, ptr %out_i32, ptr %out_float) {
   ; CHECK: shfl.up.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 1, 2;
   %up_i32 = call i32 @llvm.nvvm.shfl.up.i32(i32 %in_i32, i32 1, i32 2)
-  store i32 %up_i32, i32* %out_i32
+  store i32 %up_i32, ptr %out_i32
 
   ; CHECK: shfl.up.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 3, 4;
   %up_float = call float @llvm.nvvm.shfl.up.f32(float %in_float, i32 3, i32 4)
-  store float %up_float, float* %out_float
+  store float %up_float, ptr %out_float
 
   ; CHECK: shfl.bfly.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 6;
   %bfly_i32 = call i32 @llvm.nvvm.shfl.bfly.i32(i32 %in_i32, i32 5, i32 6)
-  store i32 %bfly_i32, i32* %out_i32
+  store i32 %bfly_i32, ptr %out_i32
 
   ; CHECK: shfl.bfly.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 7, 8;
   %bfly_float = call float @llvm.nvvm.shfl.bfly.f32(float %in_float, i32 7, i32 8)
-  store float %bfly_float, float* %out_float
+  store float %bfly_float, ptr %out_float
 
   ; CHECK: shfl.idx.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 9, 10;
   %idx_i32 = call i32 @llvm.nvvm.shfl.idx.i32(i32 %in_i32, i32 9, i32 10)
-  store i32 %idx_i32, i32* %out_i32
+  store i32 %idx_i32, ptr %out_i32
 
   ; CHECK: shfl.idx.b32 %f{{[0-9]+}}, %f{{[0-9]+}}, 11, 12;
   %idx_float = call float @llvm.nvvm.shfl.idx.f32(float %in_float, i32 11, i32 12)
-  store float %idx_float, float* %out_float
+  store float %idx_float, ptr %out_float
 
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/shift-parts.ll b/llvm/test/CodeGen/NVPTX/shift-parts.ll
index 794890175dc65..0f4018db332f5 100644
--- a/llvm/test/CodeGen/NVPTX/shift-parts.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-parts.ll
@@ -2,7 +2,7 @@
 ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
 
 ; CHECK: shift_parts_left_128
-define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
+define void @shift_parts_left_128(ptr %val, ptr %amtptr) {
 ; CHECK: shl.b64
 ; CHECK: mov.u32
 ; CHECK: sub.s32
@@ -13,15 +13,15 @@ define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
 ; CHECK: setp.gt.s32
 ; CHECK: selp.b64
 ; CHECK: shl.b64
-  %amt = load i128, i128* %amtptr
-  %a = load i128, i128* %val
+  %amt = load i128, ptr %amtptr
+  %a = load i128, ptr %val
   %val0 = shl i128 %a, %amt
-  store i128 %val0, i128* %val
+  store i128 %val0, ptr %val
   ret void
 }
 
 ; CHECK: shift_parts_right_128
-define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
+define void @shift_parts_right_128(ptr %val, ptr %amtptr) {
 ; CHECK: shr.u64
 ; CHECK: sub.s32
 ; CHECK: shl.b64
@@ -31,9 +31,9 @@ define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
 ; CHECK: setp.gt.s32
 ; CHECK: selp.b64
 ; CHECK: shr.s64
-  %amt = load i128, i128* %amtptr
-  %a = load i128, i128* %val
+  %amt = load i128, ptr %amtptr
+  %a = load i128, ptr %val
   %val0 = ashr i128 %a, %amt
-  store i128 %val0, i128* %val
+  store i128 %val0, ptr %val
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll
index f3be040b81d46..682f0630a561c 100644
--- a/llvm/test/CodeGen/NVPTX/simple-call.ll
+++ b/llvm/test/CodeGen/NVPTX/simple-call.ll
@@ -10,12 +10,12 @@ define float @device_func(float %a) noinline {
 }
 
 ; CHECK: .entry kernel_func
-define void @kernel_func(float* %a) {
-  %val = load float, float* %a
+define void @kernel_func(ptr %a) {
+  %val = load float, ptr %a
 ; CHECK: call.uni (retval0),
 ; CHECK: device_func,
   %mul = call float @device_func(float %val)
-  store float %mul, float* %a
+  store float %mul, ptr %a
   ret void
 }
 
@@ -23,4 +23,4 @@ define void @kernel_func(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}
+!1 = !{ptr @kernel_func, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/st-generic.ll b/llvm/test/CodeGen/NVPTX/st-generic.ll
index 5e0e337df2367..be423f6c2b98f 100644
--- a/llvm/test/CodeGen/NVPTX/st-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/st-generic.ll
@@ -5,66 +5,66 @@
 
 ;; i8
 
-define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
+define void @st_global_i8(ptr addrspace(0) %ptr, i8 %a) {
 ; PTX32: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
-  store i8 %a, i8 addrspace(0)* %ptr
+  store i8 %a, ptr addrspace(0) %ptr
   ret void
 }
 
 ;; i16
 
-define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
+define void @st_global_i16(ptr addrspace(0) %ptr, i16 %a) {
 ; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
 ; PTX64: ret
-  store i16 %a, i16 addrspace(0)* %ptr
+  store i16 %a, ptr addrspace(0) %ptr
   ret void
 }
 
 ;; i32
 
-define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
+define void @st_global_i32(ptr addrspace(0) %ptr, i32 %a) {
 ; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
 ; PTX64: ret
-  store i32 %a, i32 addrspace(0)* %ptr
+  store i32 %a, ptr addrspace(0) %ptr
   ret void
 }
 
 ;; i64
 
-define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
+define void @st_global_i64(ptr addrspace(0) %ptr, i64 %a) {
 ; PTX32: st.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
 ; PTX64: ret
-  store i64 %a, i64 addrspace(0)* %ptr
+  store i64 %a, ptr addrspace(0) %ptr
   ret void
 }
 
 ;; f32
 
-define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
+define void @st_global_f32(ptr addrspace(0) %ptr, float %a) {
 ; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
 ; PTX64: ret
-  store float %a, float addrspace(0)* %ptr
+  store float %a, ptr addrspace(0) %ptr
   ret void
 }
 
 ;; f64
 
-define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
+define void @st_global_f64(ptr addrspace(0) %ptr, double %a) {
 ; PTX32: st.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
 ; PTX32: ret
 ; PTX64: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
 ; PTX64: ret
-  store double %a, double addrspace(0)* %ptr
+  store double %a, ptr addrspace(0) %ptr
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/store-retval.ll b/llvm/test/CodeGen/NVPTX/store-retval.ll
index 8f6749e980efe..6e652d9b71911 100644
--- a/llvm/test/CodeGen/NVPTX/store-retval.ll
+++ b/llvm/test/CodeGen/NVPTX/store-retval.ll
@@ -21,7 +21,7 @@
 
 %struct.StNoalign = type { [5 x i32] }
 
-define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
+define %struct.StNoalign @func_StNoalign(ptr nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) {
   ; CHECK-LABEL: .func{{.*}}func_StNoalign
   ; CHECK:       ld.param.u32    [[R1:%r[0-9]+]],   [func_StNoalign_param_0];
   ; CHECK-NOT:   st.param.b32    [func_retval0+0],  %r{{[0-9]+}};
@@ -30,15 +30,14 @@ define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef re
   ; CHECK-NOT:   st.param.b32    [func_retval0+12], %r{{[0-9]+}};
   ; CHECK:       st.param.b32    [func_retval0+16], [[R1]];
   ; CHECK-NEXT:  ret;
-  %arrayidx = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 0
-  %1 = load i32, i32* %arrayidx, align 4
+  %1 = load i32, ptr %in, align 4
   %.fca.0.4.insert = insertvalue %struct.StNoalign { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison] }, i32 %1, 0, 4
   ret %struct.StNoalign %.fca.0.4.insert
 }
 
 %struct.StAlign8 = type { [5 x i32], [4 x i8] }
 
-define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
+define %struct.StAlign8 @func_StAlign8(ptr nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) {
   ; CHECK-LABEL: .func{{.*}}func_StAlign8
   ; CHECK:       ld.param.u32    [[R1:%r[0-9]+]],   [func_StAlign8_param_0];
   ; CHECK-NOT:   st.param.b32    [func_retval0+0],  %r{{[0-9]+}};
@@ -48,15 +47,14 @@ define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef reado
   ; CHECK:       st.param.b32    [func_retval0+16], [[R1]];
   ; CHECK-NOT:   st.param.v4.b8  [func_retval0+20], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
   ; CHECK-NEXT:  ret;
-  %arrayidx = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 0
-  %1 = load i32, i32* %arrayidx, align 8
+  %1 = load i32, ptr %in, align 8
   %.fca.0.4.insert = insertvalue %struct.StAlign8 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [4 x i8] poison }, i32 %1, 0, 4
   ret %struct.StAlign8 %.fca.0.4.insert
 }
 
 %struct.StAlign16 = type { [5 x i32], [12 x i8] }
 
-define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
+define %struct.StAlign16 @func_StAlign16(ptr nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) {
   ; CHECK-LABEL: .func{{.*}}func_StAlign16
   ; CHECK:       ld.param.u32    [[R1:%r[0-9]+]],   [func_StAlign16_param_0];
   ; CHECK-NOT:   st.param.b32    [func_retval0+0],  %r{{[0-9]+}};
@@ -68,8 +66,7 @@ define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef re
   ; CHECK-NOT:   st.param.v4.b8  [func_retval0+24], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
   ; CHECK-NOT:   st.param.v4.b8  [func_retval0+28], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}};
   ; CHECK-NEXT:  ret;
-  %arrayidx = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 0
-  %1 = load i32, i32* %arrayidx, align 16
+  %1 = load i32, ptr %in, align 16
   %.fca.0.4.insert = insertvalue %struct.StAlign16 { [5 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 poison], [12 x i8] poison }, i32 %1, 0, 4
   ret %struct.StAlign16 %.fca.0.4.insert
 }

diff  --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index d35c8e5316d7c..cb4ff9ec15544 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -6,12 +6,12 @@
 target triple = "nvptx-unknown-cuda"
 
 declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 ; SM20-LABEL: .entry foo
 ; SM30-LABEL: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
 ; SM20: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
 ; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
 ; SM30: ld.param.u64    %rd[[SURFREG:[0-9]+]], [foo_param_0];
@@ -22,7 +22,7 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
   %ret = sitofp i32 %val to float
 ; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
 ; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
@@ -30,9 +30,9 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
 
 ; SM20-LABEL: .entry bar
 ; SM30-LABEL: .entry bar
-define void @bar(float* %red, i32 %idx) {
+define void @bar(ptr %red, i32 %idx) {
 ; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
-  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
 ; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
 ; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
@@ -41,7 +41,7 @@ define void @bar(float* %red, i32 %idx) {
   %ret = sitofp i32 %val to float
 ; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
 ; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
@@ -49,7 +49,7 @@ define void @bar(float* %red, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
 

diff  --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll
index 784de11864276..dcf8b4ec739ed 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read.ll
@@ -6,16 +6,16 @@ target triple = "nvptx-unknown-nvcl"
 declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
 
 ; CHECK: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
 ; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
   %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
 ; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
   %ret = sitofp i32 %val to float
 ; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, float*, i32)* @foo, !"rdwrimage", i32 0}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdwrimage", i32 0}

diff  --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index bfe8c42e4a76a..d3f060561df7d 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -6,7 +6,7 @@
 target triple = "nvptx-unknown-cuda"
 
 declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 ; SM20-LABEL: .entry foo
@@ -29,7 +29,7 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 ; SM30-LABEL: .entry bar
 define void @bar(i32 %val, i32 %idx) {
 ; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
-  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
 ; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
 ; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
   tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
@@ -38,7 +38,7 @@ define void @bar(i32 %val, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i32, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1}
 

diff  --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll
index 77fc0e90609cb..0e2503aafde3c 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write.ll
@@ -13,5 +13,5 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i32, i32)* @foo, !"wroimage", i32 0}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"wroimage", i32 0}

diff  --git a/llvm/test/CodeGen/NVPTX/symbol-naming.ll b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
index d78f47a340795..143a7c6de32e3 100644
--- a/llvm/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/llvm/test/CodeGen/NVPTX/symbol-naming.ll
@@ -26,14 +26,14 @@ target triple = "nvptx64-unknown-unknown"
 ; Function Attrs: nounwind
 define internal void @.function.() {
 entry:
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
+  %call = call i32 (ptr, ...) @printf(ptr @.str)
   ret void
 }
 
 ; Function Attrs: nounwind
 define internal void @_$_function_$_() {
 entry:
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @_$_str, i32 0, i32 0))
+  %call = call i32 (ptr, ...) @printf(ptr @_$_str)
   ret void
 }
 
@@ -45,4 +45,4 @@ entry:
   ret void
 }
 
-declare i32 @printf(i8*, ...)
+declare i32 @printf(ptr, ...)

diff  --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index 12cf06fd4d6d7..8cc69e9130cd5 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -7,11 +7,11 @@
 target triple = "nvptx-unknown-cuda"
 
 declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 ; SM20-LABEL: .entry foo
 ; SM30-LABEL: .entry foo
-define void @foo(i64 %img, float* %red, i32 %idx) {
+define void @foo(i64 %img, ptr %red, i32 %idx) {
 ; SM20: ld.param.u64    %rd[[TEXREG:[0-9]+]], [foo_param_0];
 ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
 ; SM30: ld.param.u64    %rd[[TEXREG:[0-9]+]], [foo_param_0];
@@ -20,7 +20,7 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
   %ret = extractvalue { float, float, float, float } %val, 0
 ; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
 ; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
@@ -29,16 +29,16 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
 
 ; SM20-LABEL: .entry bar
 ; SM30-LABEL: .entry bar
-define void @bar(float* %red, i32 %idx) {
+define void @bar(ptr %red, i32 %idx) {
 ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0 
-  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
 ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
 ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
 ; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
 ; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
@@ -46,9 +46,9 @@ declare float @texfunc(i64)
 
 ; SM20-LABEL: .entry baz
 ; SM30-LABEL: .entry baz
-define void @baz(float* %red, i32 %idx) {
+define void @baz(ptr %red, i32 %idx) {
 ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
-  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
 ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
 ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
@@ -65,12 +65,12 @@ define void @baz(float* %red, i32 %idx) {
   %ret2 = fadd float %ret, %texcall
 ; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]]
 ; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RET2]]
-  store float %ret2, float* %red
+  store float %ret2, ptr %red
   ret void
 }
 
 !nvvm.annotations = !{!1, !2, !3, !4}
-!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
-!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
-!4 = !{void (float*, i32)* @baz, !"kernel", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @bar, !"kernel", i32 1}
+!3 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
+!4 = !{ptr @baz, !"kernel", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll
index dc750e69615aa..d2edb30a28a70 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read.ll
@@ -6,16 +6,16 @@ target triple = "nvptx-unknown-nvcl"
 declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)
 
 ; CHECK: .entry foo
-define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
+define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) {
 ; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
   %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
   %ret = extractvalue { float, float, float, float } %val, 0
 ; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
-  store float %ret, float* %red
+  store float %ret, ptr %red
   ret void
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = !{void (i64, i64, float*, i32)* @foo, !"kernel", i32 1}
-!2 = !{void (i64, i64, float*, i32)* @foo, !"rdoimage", i32 0}
-!3 = !{void (i64, i64, float*, i32)* @foo, !"sampler", i32 1}
+!1 = !{ptr @foo, !"kernel", i32 1}
+!2 = !{ptr @foo, !"rdoimage", i32 0}
+!3 = !{ptr @foo, !"sampler", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
index b685bcab6fd13..6a4f607fbe038 100644
--- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -12,7 +12,7 @@ declare i32 @llvm.nvvm.txq.width(i64)
 declare i32 @llvm.nvvm.txq.height(i64)
 declare i32 @llvm.nvvm.suq.width(i64)
 declare i32 @llvm.nvvm.suq.height(i64)
-declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
+declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1))
 
 
 ; SM20-LABEL: @t0
@@ -28,7 +28,7 @@ define i32 @t0(i64 %texHandle) {
 ; SM30-LABEL: @t1
 define i32 @t1() {
 ; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
-  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
 ; SM20: txq.width.b32 %r{{[0-9]+}}, [tex0]
 ; SM30: txq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %width = tail call i32 @llvm.nvvm.txq.width(i64 %texHandle)
@@ -49,7 +49,7 @@ define i32 @t2(i64 %texHandle) {
 ; SM30-LABEL: @t3
 define i32 @t3() {
 ; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], tex0
-  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
+  %texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @tex0)
 ; SM20: txq.height.b32 %r{{[0-9]+}}, [tex0]
 ; SM30: txq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %height = tail call i32 @llvm.nvvm.txq.height(i64 %texHandle)
@@ -70,7 +70,7 @@ define i32 @s0(i64 %surfHandle) {
 ; SM30-LABEL: @s1
 define i32 @s1() {
 ; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
-  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
 ; SM20: suq.width.b32 %r{{[0-9]+}}, [surf0]
 ; SM30: suq.width.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %width = tail call i32 @llvm.nvvm.suq.width(i64 %surfHandle)
@@ -91,7 +91,7 @@ define i32 @s2(i64 %surfHandle) {
 ; SM30-LABEL: @s3
 define i32 @s3() {
 ; SM30: mov.u64 %rd[[HANDLE:[0-9]+]], surf0
-  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
+  %surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1) @surf0)
 ; SM20: suq.height.b32 %r{{[0-9]+}}, [surf0]
 ; SM30: suq.height.b32 %r{{[0-9]+}}, [%rd[[HANDLE:[0-9]+]]]
   %height = tail call i32 @llvm.nvvm.suq.height(i64 %surfHandle)
@@ -101,5 +101,5 @@ define i32 @s3() {
 
 
 !nvvm.annotations = !{!1, !2}
-!1 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
-!2 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
+!1 = !{ptr addrspace(1) @tex0, !"texture", i32 1}
+!2 = !{ptr addrspace(1) @surf0, !"surface", i32 1}

diff  --git a/llvm/test/CodeGen/NVPTX/tuple-literal.ll b/llvm/test/CodeGen/NVPTX/tuple-literal.ll
index b0eed3ad8ba4b..157780a8ccfdb 100644
--- a/llvm/test/CodeGen/NVPTX/tuple-literal.ll
+++ b/llvm/test/CodeGen/NVPTX/tuple-literal.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 %if ptxas %{ | %ptxas-verify %}
 
-define ptx_device void @test_function({i8, i8}*) {
+define ptx_device void @test_function(ptr) {
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index de8f7074b70be..ab79f45906067 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -5,29 +5,27 @@
 
 ; CHECK: .address_size [[BITS:32|64]]
 
-%struct.__va_list_tag = type { i8*, i8*, i32, i32 }
+%struct.__va_list_tag = type { ptr, ptr, i32, i32 }
 
- at foo_ptr = internal addrspace(1) global i32 (i32, ...)* @foo, align 8
+ at foo_ptr = internal addrspace(1) global ptr @foo, align 8
 
 define i32 @foo(i32 %a, ...) {
 entry:
   %al = alloca [1 x %struct.__va_list_tag], align 8
-  %ap = bitcast [1 x %struct.__va_list_tag]* %al to i8*
   %al2 = alloca [1 x %struct.__va_list_tag], align 8
-  %ap2 = bitcast [1 x %struct.__va_list_tag]* %al2 to i8*
 
 ; Test va_start
 ; CHECK:         .param .align 8 .b8 foo_vararg[]
 ; CHECK:         mov.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], foo_vararg;
 ; CHECK-NEXT:    st.u[[BITS]] [%SP+0], [[VA_PTR]];
 
-  call void @llvm.va_start(i8* %ap)
+  call void @llvm.va_start(ptr %al)
 
 ; Test va_copy()
 ; CHECK-NEXT:	 ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
 ; CHECK-NEXT:	 st.u[[BITS]] [%SP+{{[0-9]+}}], [[VA_PTR]];
 
-  call void @llvm.va_copy(i8* %ap2, i8* %ap)
+  call void @llvm.va_copy(ptr %al2, ptr %al)
 
 ; Test va_arg(ap, int32_t)
 ; CHECK-NEXT:    ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
@@ -37,7 +35,7 @@ entry:
 ; CHECK-NEXT:    st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
 ; CHECK-NEXT:    ld.local.u32 %r{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
 
-  %0 = va_arg i8* %ap, i32
+  %0 = va_arg ptr %al, i32
 
 ; Test va_arg(ap, int64_t)
 ; CHECK-NEXT:    ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
@@ -47,7 +45,7 @@ entry:
 ; CHECK-NEXT:    st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
 ; CHECK-NEXT:    ld.local.u64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
 
-  %1 = va_arg i8* %ap, i64
+  %1 = va_arg ptr %al, i64
 
 ; Test va_arg(ap, double)
 ; CHECK-NEXT:    ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
@@ -57,9 +55,9 @@ entry:
 ; CHECK-NEXT:    st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
 ; CHECK-NEXT:    ld.local.f64 %fd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
 
-  %2 = va_arg i8* %ap, double
+  %2 = va_arg ptr %al, double
 
-; Test va_arg(ap, void *)
+; Test va_arg(ap, ptr)
 ; CHECK-NEXT:    ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP+0];
 ; CHECK32-NEXT:  add.s32 [[VA_PTR_TMP:%r[0-9]+]], [[VA_PTR]], 3;
 ; CHECK64-NEXT:  add.s64 [[VA_PTR_TMP:%rd[0-9]+]], [[VA_PTR]], 7;
@@ -70,17 +68,17 @@ entry:
 ; CHECK-NEXT:    st.u[[BITS]] [%SP+0], [[VA_PTR_NEXT]];
 ; CHECK-NEXT:    ld.local.u[[BITS]] %{{(r|rd)[0-9]+}}, [[[VA_PTR_ALIGN]]];
 
-  %3 = va_arg i8* %ap, i8*
-  %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, i8* %3)
+  %3 = va_arg ptr %al, ptr
+  %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, ptr %3)
 
-  call void @llvm.va_end(i8* %ap)
-  %4 =  va_arg i8* %ap2, i32
-  call void @llvm.va_end(i8* %ap2)
+  call void @llvm.va_end(ptr %al)
+  %4 =  va_arg ptr %al2, i32
+  call void @llvm.va_end(ptr %al2)
   %5 = add i32 %call, %4
   ret i32 %5
 }
 
-define i32 @test_foo(i32 %i, i64 %l, double %d, i8* %p) {
+define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
 ; Test indirect variadic function call.
 
 ; Load arguments to temporary variables
@@ -101,12 +99,12 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, i8* %p) {
 ; CHECK-NEXT:    prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
 
 entry:
-  %ptr = load i32 (i32, ...)*, i32 (i32, ...)** addrspacecast (i32 (i32, ...)* addrspace(1)* @foo_ptr to i32 (i32, ...)**), align 8
-  %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, i8* %p)
+  %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
+  %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, ptr %p)
   ret i32 %call
 }
 
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_end(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare i32 @bar(i32, i32, i64, double, i8*)
+declare void @llvm.va_start(ptr)
+declare void @llvm.va_end(ptr)
+declare void @llvm.va_copy(ptr, ptr)
+declare i32 @bar(i32, i32, i64, double, ptr)

diff  --git a/llvm/test/CodeGen/NVPTX/vec8.ll b/llvm/test/CodeGen/NVPTX/vec8.ll
index f8dc9f3b693ef..fd383ce709bb7 100644
--- a/llvm/test/CodeGen/NVPTX/vec8.ll
+++ b/llvm/test/CodeGen/NVPTX/vec8.ll
@@ -4,7 +4,7 @@
 target triple = "nvptx-unknown-cuda"
 
 ; CHECK: .visible .func foo
-define void @foo(<8 x i8> %a, i8* %b) {
+define void @foo(<8 x i8> %a, ptr %b) {
 ; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [foo_param_0]
 ; CHECK-DAG: ld.param.v4.u8 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [foo_param_0+4]
 ; CHECK-DAG: ld.param.u32   %[[B:r[0-9+]]], [foo_param_1]
@@ -13,7 +13,7 @@ define void @foo(<8 x i8> %a, i8* %b) {
   %t0 = extractelement <8 x i8> %a, i32 1
   %t1 = extractelement <8 x i8> %a, i32 6
   %t  = add i8 %t0, %t1
-  store i8 %t, i8* %b
+  store i8 %t, ptr %b
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/vector-compare.ll b/llvm/test/CodeGen/NVPTX/vector-compare.ll
index 4908394a1e3fc..8232258d12057 100644
--- a/llvm/test/CodeGen/NVPTX/vector-compare.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-compare.ll
@@ -5,15 +5,15 @@
 ; scalarized.  If codegen fails, then the type legalizer incorrectly
 ; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
 
-define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
-  %aval = load <2 x i32>, <2 x i32>* %a
-  %bval = load <2 x i32>, <2 x i32>* %b
+define void @foo(ptr %a, ptr %b, ptr %r1, ptr %r2) {
+  %aval = load <2 x i32>, ptr %a
+  %bval = load <2 x i32>, ptr %b
   %res = icmp slt <2 x i32> %aval, %bval
   %t1 = extractelement <2 x i1> %res, i32 0
   %t2 = extractelement <2 x i1> %res, i32 1
   %t1a = zext i1 %t1 to i32
   %t2a = zext i1 %t2 to i32
-  store i32 %t1a, i32* %r1
-  store i32 %t2a, i32* %r2
+  store i32 %t1a, ptr %r1
+  store i32 %t2a, ptr %r2
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll
index 82238e98e8214..64662dd020c57 100644
--- a/llvm/test/CodeGen/NVPTX/vector-loads.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll
@@ -9,60 +9,60 @@
 ; which will load two floats at once into scalar registers.
 
 ; CHECK-LABEL: foo
-define void @foo(<2 x float>* %a) {
+define void @foo(ptr %a) {
 ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <2 x float>, <2 x float>* %a
+  %t1 = load <2 x float>, ptr %a
   %t2 = fmul <2 x float> %t1, %t1
-  store <2 x float> %t2, <2 x float>* %a
+  store <2 x float> %t2, ptr %a
   ret void
 }
 
 ; CHECK-LABEL: foo2
-define void @foo2(<4 x float>* %a) {
+define void @foo2(ptr %a) {
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <4 x float>, <4 x float>* %a
+  %t1 = load <4 x float>, ptr %a
   %t2 = fmul <4 x float> %t1, %t1
-  store <4 x float> %t2, <4 x float>* %a
+  store <4 x float> %t2, ptr %a
   ret void
 }
 
 ; CHECK-LABEL: foo3
-define void @foo3(<8 x float>* %a) {
+define void @foo3(ptr %a) {
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <8 x float>, <8 x float>* %a
+  %t1 = load <8 x float>, ptr %a
   %t2 = fmul <8 x float> %t1, %t1
-  store <8 x float> %t2, <8 x float>* %a
+  store <8 x float> %t2, ptr %a
   ret void
 }
 
 
 
 ; CHECK-LABEL: foo4
-define void @foo4(<2 x i32>* %a) {
+define void @foo4(ptr %a) {
 ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <2 x i32>, <2 x i32>* %a
+  %t1 = load <2 x i32>, ptr %a
   %t2 = mul <2 x i32> %t1, %t1
-  store <2 x i32> %t2, <2 x i32>* %a
+  store <2 x i32> %t2, ptr %a
   ret void
 }
 
 ; CHECK-LABEL: foo5
-define void @foo5(<4 x i32>* %a) {
+define void @foo5(ptr %a) {
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <4 x i32>, <4 x i32>* %a
+  %t1 = load <4 x i32>, ptr %a
   %t2 = mul <4 x i32> %t1, %t1
-  store <4 x i32> %t2, <4 x i32>* %a
+  store <4 x i32> %t2, ptr %a
   ret void
 }
 
 ; CHECK-LABEL: foo6
-define void @foo6(<8 x i32>* %a) {
+define void @foo6(ptr %a) {
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <8 x i32>, <8 x i32>* %a
+  %t1 = load <8 x i32>, ptr %a
   %t2 = mul <8 x i32> %t1, %t1
-  store <8 x i32> %t2, <8 x i32>* %a
+  store <8 x i32> %t2, ptr %a
   ret void
 }
 
@@ -71,8 +71,7 @@ define void @foo6(<8 x i32>* %a) {
 declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
 ; CHECK-LABEL: foo_complex
-define void @foo_complex(i8* nocapture readonly align 16 dereferenceable(134217728) %alloc0) {
-  %targ0.1.typed = bitcast i8* %alloc0 to [1024 x [131072 x i8]]*
+define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(134217728) %alloc0) {
   %t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
   %t1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
   %t2 = lshr i32 %t1, 8
@@ -86,14 +85,14 @@ define void @foo_complex(i8* nocapture readonly align 16 dereferenceable(1342177
   %t10 = or i32 %t4, 129
   %t11 = zext i32 %t10 to i64
   %t20 = zext i32 %t2 to i64
-  %t27 = getelementptr inbounds [1024 x [131072 x i8]], [1024 x [131072 x i8]]* %targ0.1.typed, i64 0, i64 %t20, i64 %t9
+  %t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9
 ; CHECK: ld.v2.u8
-  %t28 = load i8, i8* %t27, align 2
-  %t31 = getelementptr inbounds [1024 x [131072 x i8]], [1024 x [131072 x i8]]* %targ0.1.typed, i64 0, i64 %t20, i64 %t11
-  %t32 = load i8, i8* %t31, align 1
+  %t28 = load i8, ptr %t27, align 2
+  %t31 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t11
+  %t32 = load i8, ptr %t31, align 1
   %t33 = icmp ult i8 %t28, %t32
   %t34 = select i1 %t33, i8 %t32, i8 %t28
-  store i8 %t34, i8* %t31
+  store i8 %t34, ptr %t31
 ; CHECK: ret
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/vector-select.ll b/llvm/test/CodeGen/NVPTX/vector-select.ll
index 0d4d45611ef41..aab3da0b23e8a 100644
--- a/llvm/test/CodeGen/NVPTX/vector-select.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-select.ll
@@ -4,13 +4,13 @@
 ; This test makes sure that vector selects are scalarized by the type legalizer.
 ; If not, type legalization will fail.
 
-define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
+define void @foo(ptr addrspace(1) %def_a, ptr addrspace(1) %def_b, ptr addrspace(1) %def_c) {
 entry:
-  %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a
-  %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c
-  %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b
+  %tmp4 = load <2 x i32>, ptr addrspace(1) %def_a
+  %tmp6 = load <2 x i32>, ptr addrspace(1) %def_c
+  %tmp8 = load <2 x i32>, ptr addrspace(1) %def_b
   %0 = icmp sge <2 x i32> %tmp4, zeroinitializer
   %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
-  store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
+  store <2 x i32> %cond, ptr addrspace(1) %def_c
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/vector-stores.ll b/llvm/test/CodeGen/NVPTX/vector-stores.ll
index 5624d7e9565e6..e6d545323dbe9 100644
--- a/llvm/test/CodeGen/NVPTX/vector-stores.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-stores.ll
@@ -3,29 +3,29 @@
 
 ; CHECK: .visible .func foo1
 ; CHECK: st.v2.f32
-define void @foo1(<2 x float> %val, <2 x float>* %ptr) {
-  store <2 x float> %val, <2 x float>* %ptr
+define void @foo1(<2 x float> %val, ptr %ptr) {
+  store <2 x float> %val, ptr %ptr
   ret void
 }
 
 ; CHECK: .visible .func foo2
 ; CHECK: st.v4.f32
-define void @foo2(<4 x float> %val, <4 x float>* %ptr) {
-  store <4 x float> %val, <4 x float>* %ptr
+define void @foo2(<4 x float> %val, ptr %ptr) {
+  store <4 x float> %val, ptr %ptr
   ret void
 }
 
 ; CHECK: .visible .func foo3
 ; CHECK: st.v2.u32
-define void @foo3(<2 x i32> %val, <2 x i32>* %ptr) {
-  store <2 x i32> %val, <2 x i32>* %ptr
+define void @foo3(<2 x i32> %val, ptr %ptr) {
+  store <2 x i32> %val, ptr %ptr
   ret void
 }
 
 ; CHECK: .visible .func foo4
 ; CHECK: st.v4.u32
-define void @foo4(<4 x i32> %val, <4 x i32>* %ptr) {
-  store <4 x i32> %val, <4 x i32>* %ptr
+define void @foo4(<4 x i32> %val, ptr %ptr) {
+  store <4 x i32> %val, ptr %ptr
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/NVPTX/vectorize-misaligned.ll b/llvm/test/CodeGen/NVPTX/vectorize-misaligned.ll
index 44f6bda0eb178..ec9c38258c57a 100644
--- a/llvm/test/CodeGen/NVPTX/vectorize-misaligned.ll
+++ b/llvm/test/CodeGen/NVPTX/vectorize-misaligned.ll
@@ -8,24 +8,24 @@ target triple = "nvptx64-nvidia-cuda"
 ; CHECK: ld.global.v2.f32
 ; CHECK: st.global.v2.f32
 ; CHECK: st.global.v2.f32
-define void @test1(float addrspace(1)* noalias align 8 %in, float addrspace(1)* noalias align 8 %out) {
-  %in.1 = getelementptr float, float addrspace(1)* %in, i32 1
-  %in.2 = getelementptr float, float addrspace(1)* %in, i32 2
-  %in.3 = getelementptr float, float addrspace(1)* %in, i32 3
-  %v0 = load float, float addrspace(1)* %in, align 8
-  %v1 = load float, float addrspace(1)* %in.1, align 4
-  %v2 = load float, float addrspace(1)* %in.2, align 8
-  %v3 = load float, float addrspace(1)* %in.3, align 4
+define void @test1(ptr addrspace(1) noalias align 8 %in, ptr addrspace(1) noalias align 8 %out) {
+  %in.1 = getelementptr float, ptr addrspace(1) %in, i32 1
+  %in.2 = getelementptr float, ptr addrspace(1) %in, i32 2
+  %in.3 = getelementptr float, ptr addrspace(1) %in, i32 3
+  %v0 = load float, ptr addrspace(1) %in, align 8
+  %v1 = load float, ptr addrspace(1) %in.1, align 4
+  %v2 = load float, ptr addrspace(1) %in.2, align 8
+  %v3 = load float, ptr addrspace(1) %in.3, align 4
   %sum0 = fadd float %v0, %v1
   %sum1 = fadd float %v1, %v2
   %sum2 = fadd float %v3, %v1
   %sum3 = fadd float %v2, %v3
-  %out.1 = getelementptr float, float addrspace(1)* %out, i32 1
-  %out.2 = getelementptr float, float addrspace(1)* %out, i32 2
-  %out.3 = getelementptr float, float addrspace(1)* %out, i32 3
-  store float %sum0, float addrspace(1)* %out, align 8
-  store float %sum1, float addrspace(1)* %out.1, align 4
-  store float %sum2, float addrspace(1)* %out.2, align 8
-  store float %sum3, float addrspace(1)* %out.3, align 4
+  %out.1 = getelementptr float, ptr addrspace(1) %out, i32 1
+  %out.2 = getelementptr float, ptr addrspace(1) %out, i32 2
+  %out.3 = getelementptr float, ptr addrspace(1) %out, i32 3
+  store float %sum0, ptr addrspace(1) %out, align 8
+  store float %sum1, ptr addrspace(1) %out.1, align 4
+  store float %sum2, ptr addrspace(1) %out.2, align 8
+  store float %sum3, ptr addrspace(1) %out.3, align 4
   ret void
 }

diff  --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll
index ad2e12dfaea63..3386731827cd8 100644
--- a/llvm/test/CodeGen/NVPTX/weak-global.ll
+++ b/llvm/test/CodeGen/NVPTX/weak-global.ll
@@ -5,6 +5,6 @@
 @g = common addrspace(1) global i32 zeroinitializer
 
 define i32 @func0() {
-  %val = load i32, i32 addrspace(1)* @g
+  %val = load i32, ptr addrspace(1) @g
   ret i32 %val
 }


        


More information about the llvm-commits mailing list