[llvm] ff302f8 - [AArch64] Convert some tests to opaque pointers (NFC)

Tue Dec 20 03:02:05 PST 2022

Author: Nikita Popov
Date: 2022-12-20T12:01:57+01:00
New Revision: ff302f850242b7f5e1fc48235471b8273c421236

URL: https://github.com/llvm/llvm-project/commit/ff302f850242b7f5e1fc48235471b8273c421236
DIFF: https://github.com/llvm/llvm-project/commit/ff302f850242b7f5e1fc48235471b8273c421236.diff

LOG: [AArch64] Convert some tests to opaque pointers (NFC)

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir
    llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
    llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
    llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
    llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
    llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll
    llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
    llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
    llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
    llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
index d503074f9f693..b2aadd7ca5c2d 100644

--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir
@@ -9,9 +9,9 @@
   define dso_local i32 @gv_large() {
   entry:
     %retval = alloca i32, align 4
-    store i32 0, i32* %retval, align 4
-    %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4
-    %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4
+    store i32 0, ptr %retval, align 4
+    %0 = load i32, ptr @foo1, align 4
+    %1 = load i32, ptr @foo2, align 4
     %add = add nsw i32 %0, %1
     ret i32 %add
   }
@@ -41,8 +41,8 @@ body:             |
     ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]]
     ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval)
-    ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
-    ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1)
+    ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2)
     ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]]
     ; CHECK: $w0 = COPY [[ADDWrr]]
     ; CHECK: RET_ReallyLR implicit $w0
@@ -53,8 +53,8 @@ body:             |
     %6:gpr(p0) = COPY %7(p0)
     %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval
     G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval)
-    %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
-    %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from @foo1)
+    %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from @foo2)
     %8:gpr(s32) = G_ADD %2, %5
     $w0 = COPY %8(s32)
     RET_ReallyLR implicit $w0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir
index 64aee2d0afbf4..e14c43a7923e1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-tiny.mir
@@ -9,9 +9,9 @@
   define dso_local i32 @gv_tiny() {
   entry:
     %retval = alloca i32, align 4
-    store i32 0, i32* %retval, align 4
-    %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4
-    %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4
+    store i32 0, ptr %retval, align 4
+    %0 = load i32, ptr @foo1, align 4
+    %1 = load i32, ptr @foo2, align 4
     %add = add nsw i32 %0, %1
     ret i32 %add
   }
@@ -35,8 +35,8 @@ body:             |
     ; CHECK: [[ADR1:%[0-9]+]]:gpr64 = ADR @foo2
     ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[ADR1]]
     ; CHECK: STRWui $wzr, %stack.0.retval, 0 :: (store (s32) into %ir.retval)
-    ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
-    ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load (s32) from @foo1)
+    ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load (s32) from @foo2)
     ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]]
     ; CHECK: $w0 = COPY [[ADDWrr]]
     ; CHECK: RET_ReallyLR implicit $w0
@@ -47,8 +47,8 @@ body:             |
     %6:gpr(p0) = COPY %7(p0)
     %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval
     G_STORE %1(s32), %0(p0) :: (store (s32) into %ir.retval)
-    %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`)
-    %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`)
+    %2:gpr(s32) = G_LOAD %3(p0) :: (load (s32) from @foo1)
+    %5:gpr(s32) = G_LOAD %6(p0) :: (load (s32) from @foo2)
     %8:gpr(s32) = G_ADD %2, %5
     $w0 = COPY %8(s32)
     RET_ReallyLR implicit $w0

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 59b0b09ea1acc..c2a3acbedc8ba 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -16,16 +16,16 @@ target triple = "aarch64-linux-gnueabi"
 
 ; Check that when two complex GEPs are used in two basic blocks, LLVM can
 ; eliminate the common subexpression for the second use.
-define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
-  %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
-  %1 = load i32, i32* %liberties, align 4
+define void @test_GEP_CSE(ptr %string, ptr %adj, i32 %lib, i64 %idxprom) {
+  %liberties = getelementptr [240 x %struct], ptr %string, i64 1, i64 %idxprom, i32 3
+  %1 = load i32, ptr %liberties, align 4
   %cmp = icmp eq i32 %1, %lib
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
-  %2 = load i32, i32* %origin, align 4
-  store i32 %2, i32* %adj, align 4
+  %origin = getelementptr [240 x %struct], ptr %string, i64 1, i64 %idxprom, i32 2
+  %2 = load i32, ptr %origin, align 4
+  store i32 %2, ptr %adj, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
@@ -39,7 +39,7 @@ if.end:                                           ; preds = %if.then, %entry
 ; CHECK:ldr
 
 ; CHECK-NoAA-LABEL: @test_GEP_CSE(
-; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64
+; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64
 ; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
 ; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
 ; CHECK-NoAA: add i64 [[PTR2]], 23052
@@ -51,38 +51,35 @@ if.end:                                           ; preds = %if.then, %entry
 ; CHECK-NoAA: inttoptr
 
 ; CHECK-UseAA-LABEL: @test_GEP_CSE(
-; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
 ; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, i8* [[PTR0]], i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23052
-; CHECK-UseAA: bitcast
+; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
+; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052
 ; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23048
-; CHECK-UseAA: bitcast
+; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048
 
 %class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
-%struct.pt = type { %struct.point*, i32, i32 }
+%struct.pt = type { ptr, i32, i32 }
 %struct.point = type { i32, i32 }
 
 ; Check when a GEP is used across two basic block, LLVM can sink the address
 ; calculation and code gen can generate a better addressing mode for the second
 ; use.
-define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
-  %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
-  %2 = load i32, i32* %1, align 4
-  %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
-  %4 = load i32, i32* %3, align 4
+define void @test_GEP_across_BB(ptr %this, i64 %idx) {
+  %1 = getelementptr %class.my, ptr %this, i64 0, i32 3, i64 %idx, i32 1
+  %2 = load i32, ptr %1, align 4
+  %3 = getelementptr %class.my, ptr %this, i64 0, i32 3, i64 %idx, i32 2
+  %4 = load i32, ptr %3, align 4
   %5 = icmp eq i32 %2, %4
   br i1 %5, label %if.true, label %exit
 
 if.true:
   %6 = shl i32 %4, 1
-  store i32 %6, i32* %3, align 4
+  store i32 %6, ptr %3, align 4
   br label %exit
 
 exit:
   %7 = add nsw i32 %4, 1
-  store i32 %7, i32* %1, align 4
+  store i32 %7, ptr %1, align 4
   ret void
 }
 ; CHECK-LABEL: test_GEP_across_BB:
@@ -97,21 +94,19 @@ exit:
 ; CHECK-NoAA: add i64 [[TMP]], 532
 ; CHECK-NoAA: if.true:
 ; CHECK-NoAA: inttoptr
-; CHECK-NoAA: bitcast
 ; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, {{.*}}, i64 532
 ; CHECK-NoAA: exit:
 ; CHECK-NoAA: inttoptr
-; CHECK-NoAA: bitcast
 ; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, {{.*}}, i64 528
 
 ; CHECK-UseAA-LABEL: test_GEP_across_BB(
 ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
-; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 528
-; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 532
+; CHECK-UseAA: getelementptr i8, ptr [[PTR0]], i64 528
+; CHECK-UseAA: getelementptr i8, ptr [[PTR0]], i64 532
 ; CHECK-UseAA: if.true:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 532
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, ptr [[PTR0]], i64 532
 ; CHECK-UseAA: exit:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 528
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, ptr [[PTR0]], i64 528
 
 %struct.S = type { float, double }
 @struct_array = global [1024 x %struct.S] zeroinitializer, align 16
@@ -121,49 +116,49 @@ exit:
 ; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the
 ; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field
 ; offset is 8, the total constant offset is (5 * 16 + 8) = 88.
-define double* @test-struct_1(i32 %i) {
+define ptr @test-struct_1(i32 %i) {
 entry:
   %add = add nsw i32 %i, 5
   %idxprom = sext i32 %add to i64
-  %p = getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
-  ret double* %p
+  %p = getelementptr [1024 x %struct.S], ptr @struct_array, i64 0, i64 %idxprom, i32 1
+  ret ptr %p
 }
 ; CHECK-NoAA-LABEL: @test-struct_1(
 ; CHECK-NoAA-NOT: getelementptr
 ; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
 
 ; CHECK-UseAA-LABEL: @test-struct_1(
-; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88
+; CHECK-UseAA: getelementptr i8, ptr %{{[a-zA-Z0-9]+}}, i64 88
 
 %struct3 = type { i64, i32 }
 %struct2 = type { %struct3, i32 }
 %struct1 = type { i64, %struct2 }
-%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+%struct0 = type { i32, i32, ptr, [100 x %struct1] }
 
 ; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1".
 ; "i32 3" is the 4th element whose field offset is 16. The alloca size of
 ; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the
 ; total constant offset is 16 + (-2 * 32) + 8 = -40
-define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
+define ptr @test-struct_2(ptr %ptr, i64 %idx) {
 entry:
   %arrayidx = add nsw i64 %idx, -2
-  %ptr2 = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
-  ret %struct2* %ptr2
+  %ptr2 = getelementptr %struct0, ptr %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+  ret ptr %ptr2
 }
 ; CHECK-NoAA-LABEL: @test-struct_2(
 ; CHECK-NoAA-NOT: = getelementptr
 ; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
 
 ; CHECK-UseAA-LABEL: @test-struct_2(
-; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 -40
+; CHECK-UseAA: getelementptr i8, ptr %{{[a-zA-Z0-9]+}}, i64 -40
 
 ; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
 ; pass does not generate incorrect result.
-define void @test_const_add([3 x i32]* %in) {
+define void @test_const_add(ptr %in) {
   %inc = add nsw i32 2, 1
   %idxprom = sext i32 %inc to i64
-  %arrayidx = getelementptr [3 x i32], [3 x i32]* %in, i64 %idxprom, i64 2
-  store i32 0, i32* %arrayidx, align 4
+  %arrayidx = getelementptr [3 x i32], ptr %in, i64 %idxprom, i64 2
+  store i32 0, ptr %arrayidx, align 4
   ret void
 }
 ; CHECK-LABEL: test_const_add:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
index 6ced29d9757d1..a5d94c13ef010 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-ld-combine.ll
@@ -6,14 +6,13 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "arm64--linux-gnu"
 
 ; This should be lowered into LD4
-define void @aarch64_ilc_const(<4 x float>* %ptr) {
+define void @aarch64_ilc_const(ptr %ptr) {
 entry:
 
 ;;; Check LLVM transformation
 ; CHECK-LABEL: @aarch64_ilc_const(
-; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
-; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
-; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16
+; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 2
+; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
@@ -25,14 +24,14 @@ entry:
 ; AS: ld4
 ; AS: ret
 
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  2
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  3
-  %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  4
-  %gep4 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  5
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
-  %ld3 = load <4 x float>, <4 x float>* %gep3, align 16
-  %ld4 = load <4 x float>, <4 x float>* %gep4, align 16
+  %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64  2
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64  3
+  %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64  4
+  %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64  5
+  %ld1 = load <4 x float>, ptr %gep1, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
+  %ld3 = load <4 x float>, ptr %gep3, align 16
+  %ld4 = load <4 x float>, ptr %gep4, align 16
   %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -42,24 +41,23 @@ entry:
   %m8_11  = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
-  store <4 x float> %m8_11, <4 x float>* %gep3, align 16
-  store <4 x float> %m12_15, <4 x float>* %gep4, align 16
+  store <4 x float> %m0_3, ptr %gep1, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
+  store <4 x float> %m8_11, ptr %gep3, align 16
+  store <4 x float> %m12_15, ptr %gep4, align 16
   ret void
 }
 
 ; This should be lowered into LD4
-define void @aarch64_ilc_idx(<4 x float>* %ptr, i64 %idx) {
+define void @aarch64_ilc_idx(ptr %ptr, i64 %idx) {
 entry:
 
 ;;; Check LLVM transformation
 ; CHECK-LABEL: @aarch64_ilc_idx(
 ; CHECK-DAG: [[ADD:%.+]] = add i64 %idx, 16
 ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 [[ADD]], 2
-; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
-; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
-; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 16
+; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]]
+; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
@@ -87,14 +85,14 @@ entry:
   %a4 = add i64 %idx, 28
   %idx4 = lshr i64 %a4, 2
 
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx2
-  %gep4 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx4
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx1
-  %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx3
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
-  %ld3 = load <4 x float>, <4 x float>* %gep3, align 16
-  %ld4 = load <4 x float>, <4 x float>* %gep4, align 16
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx2
+  %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx4
+  %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx1
+  %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx3
+  %ld1 = load <4 x float>, ptr %gep1, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
+  %ld3 = load <4 x float>, ptr %gep3, align 16
+  %ld4 = load <4 x float>, ptr %gep4, align 16
   %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -104,24 +102,23 @@ entry:
   %m8_11  = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
-  store <4 x float> %m8_11, <4 x float>* %gep3, align 16
-  store <4 x float> %m12_15, <4 x float>* %gep4, align 16
+  store <4 x float> %m0_3, ptr %gep1, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
+  store <4 x float> %m8_11, ptr %gep3, align 16
+  store <4 x float> %m12_15, ptr %gep4, align 16
   ret void
 }
 
 ; This should be lowered into LD4, a offset of has to be taken into account
 %struct.ilc = type <{ float, [0 x <4 x float>] }>
-define void @aarch64_ilc_struct(%struct.ilc* %ptr, i64 %idx) {
+define void @aarch64_ilc_struct(ptr %ptr, i64 %idx) {
 entry:
 
 ;;; Check LLVM transformation
 ; CHECK-LABEL: @aarch64_ilc_struct(
 ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2
-; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 [[LSHR]]
-; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <16 x float>*
-; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, <16 x float>* [[CAST]], align 4
+; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 [[LSHR]]
+; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 4
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
 ; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
@@ -147,15 +144,15 @@ entry:
   %a3 = add i64 %idx, 12
   %idx4 = lshr i64 %a3, 2
 
-  %gep2 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx2
-  %gep3 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx3
-  %gep4 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx4
+  %gep2 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx2
+  %gep3 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx3
+  %gep4 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx4
   %idx1 = lshr i64 %idx, 2
-  %gep1 = getelementptr %struct.ilc, %struct.ilc* %ptr, i32 0, i32 1, i64 %idx1
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 4
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 4
-  %ld3 = load <4 x float>, <4 x float>* %gep3, align 4
-  %ld4 = load <4 x float>, <4 x float>* %gep4, align 4
+  %gep1 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx1
+  %ld1 = load <4 x float>, ptr %gep1, align 4
+  %ld2 = load <4 x float>, ptr %gep2, align 4
+  %ld3 = load <4 x float>, ptr %gep3, align 4
+  %ld4 = load <4 x float>, ptr %gep4, align 4
   %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -165,21 +162,20 @@ entry:
   %m8_11  = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
-  store <4 x float> %m8_11, <4 x float>* %gep3, align 16
-  store <4 x float> %m12_15, <4 x float>* %gep4, align 16
+  store <4 x float> %m0_3, ptr %gep1, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
+  store <4 x float> %m8_11, ptr %gep3, align 16
+  store <4 x float> %m12_15, ptr %gep4, align 16
   ret void
 }
 
 ; This should be lowered into LD2
-define void @aarch64_ilc_idx_ld2(<4 x float>* %ptr, i64 %idx) {
+define void @aarch64_ilc_idx_ld2(ptr %ptr, i64 %idx) {
 entry:
 ; CHECK-LABEL: @aarch64_ilc_idx_ld2(
 ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2
-; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
-; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <8 x float>*
-; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, <8 x float>* [[CAST]], align 16
+; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]]
+; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, ptr [[GEP]], align 16
 ; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-DAG: ret void
@@ -192,26 +188,25 @@ entry:
   %a1 = add i64 %idx, 4
   %idx2 = lshr i64 %a1, 2
 
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx1
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx2
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+  %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx1
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx2
+  %ld1 = load <4 x float>, ptr %gep1, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
   %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1
-  store <4 x float> %m4_7, <4 x float>* %gep2
+  store <4 x float> %m0_3, ptr %gep1
+  store <4 x float> %m4_7, ptr %gep2
   ret void
 }
 
 ; This should be lowered into LD3
-define void @aarch64_ilc_idx_ld3(<4 x float>* %ptr, i64 %idx) {
+define void @aarch64_ilc_idx_ld3(ptr %ptr, i64 %idx) {
 entry:
 ; CHECK-LABEL: @aarch64_ilc_idx_ld3(
 ; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2
-; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 [[LSHR]]
-; CHECK-DAG: [[CAST:%.+]] = bitcast <4 x float>* [[GEP]] to <12 x float>*
-; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, <12 x float>* [[CAST]], align 16
+; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]]
+; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, ptr [[GEP]], align 16
 ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
@@ -227,12 +222,12 @@ entry:
   %a2 = add i64 %idx, 8
   %idx3 = lshr i64 %a2, 2
 
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx1
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx2
-  %gep3 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64  %idx3
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
-  %ld3 = load <4 x float>, <4 x float>* %gep3, align 16
+  %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx1
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx2
+  %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64  %idx3
+  %ld1 = load <4 x float>, ptr %gep1, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
+  %ld3 = load <4 x float>, ptr %gep3, align 16
 
   %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 3, i32 6, i32 undef>
   %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 4, i32 7, i32 undef>
@@ -241,27 +236,27 @@ entry:
   %m4_7 = shufflevector <4 x float> %sv2, <4 x float> %ld3, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
   %m8_11 = shufflevector <4 x float> %sv3, <4 x float> %ld3, <4 x i32> <i32 0, i32 1, i32 4, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
-  store <4 x float> %m8_11, <4 x float>* %gep3, align 16
+  store <4 x float> %m0_3, ptr %gep1, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
+  store <4 x float> %m8_11, ptr %gep3, align 16
   ret void
 }
 ;  %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 undef, i32 4, i32 undef>
 
 ; This must not be lowered
-define void @aarch64_ilc_i32_idx(<4 x float>* %ptr, i32 %idx) {
+define void @aarch64_ilc_i32_idx(ptr %ptr, i32 %idx) {
 ; CHECK-LABEL: @aarch64_ilc_i32_idx(
 ; CHECK: %idx1 = lshr i32 %idx, 2
 ; CHECK-NEXT: %a1 = add i32 %idx, 4
 ; CHECK-NEXT: %idx2 = lshr i32 %a1, 2
-; CHECK-NEXT: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx1
-; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx2
-; CHECK-NEXT: %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+; CHECK-NEXT: %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1
+; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2
+; CHECK-NEXT: %ld1 = load <4 x float>, ptr %gep1, align 16
+; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16
 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+; CHECK-NEXT: store <4 x float> %m0_3, ptr %gep1, align 16
+; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16
 ; CHECK-NEXT: ret void
 
 ; AS-LABEL: aarch64_ilc_i32_idx
@@ -276,29 +271,28 @@ entry:
   %a1 = add i32 %idx, 4
   %idx2 = lshr i32 %a1, 2
 
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx1
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 %idx2
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+  %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2
+  %ld1 = load <4 x float>, ptr %gep1, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
   %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+  store <4 x float> %m0_3, ptr %gep1, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
   ret void
 }
 
 ; Volatile loads must not be lowered
-define void @aarch64_ilc_volatile(<4 x float>* %ptr) {
+define void @aarch64_ilc_volatile(ptr %ptr) {
 ; CHECK-LABEL: @aarch64_ilc_volatile(
-; CHECK: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0
-; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1
-; CHECK-NEXT: %ld1 = load volatile <4 x float>, <4 x float>* %gep1, align 16
-; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1
+; CHECK-NEXT: %ld1 = load volatile <4 x float>, ptr %ptr, align 16
+; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16
 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16
+; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16
 ; CHECK-NEXT: ret void
 
 ; AS-LABEL: aarch64_ilc_volatile
@@ -309,30 +303,28 @@ define void @aarch64_ilc_volatile(<4 x float>* %ptr) {
 ; AS-DAG: ret
 
 entry:
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1
-  %ld1 = load volatile <4 x float>, <4 x float>* %gep1, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1
+  %ld1 = load volatile <4 x float>, ptr %ptr, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
   %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+  store <4 x float> %m0_3, ptr %ptr, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
   ret void
 }
 
 ; This must not be lowered
-define void @aarch64_ilc_depmem(<4 x float>* %ptr, i32 %idx) {
+define void @aarch64_ilc_depmem(ptr %ptr, i32 %idx) {
 entry:
 ; CHECK-LABEL: @aarch64_ilc_depmem(
-; CHECK: %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0
-; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1
-; CHECK-NEXT: %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-; CHECK-NEXT: store <4 x float> %ld1, <4 x float>* %gep2, align 16
-; CHECK-NEXT: %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1
+; CHECK-NEXT: %ld1 = load <4 x float>, ptr %ptr, align 16
+; CHECK-NEXT: store <4 x float> %ld1, ptr %gep2, align 16
+; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16
 ; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-; CHECK-NEXT: store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16
+; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16
 ; CHECK-NEXT: ret void
 
 ; AS-LABEL: aarch64_ilc_depmem
@@ -342,39 +334,32 @@ entry:
 ; AS-NOT: ld4
 ; AS-DAG: ret
 
-  %gep1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 0
-  %gep2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i32 1
-  %ld1 = load <4 x float>, <4 x float>* %gep1, align 16
-  store <4 x float> %ld1, <4 x float>* %gep2, align 16
-  %ld2 = load <4 x float>, <4 x float>* %gep2, align 16
+  %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1
+  %ld1 = load <4 x float>, ptr %ptr, align 16
+  store <4 x float> %ld1, ptr %gep2, align 16
+  %ld2 = load <4 x float>, ptr %gep2, align 16
   %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
-  store <4 x float> %m0_3, <4 x float>* %gep1, align 16
-  store <4 x float> %m4_7, <4 x float>* %gep2, align 16
+  store <4 x float> %m0_3, ptr %ptr, align 16
+  store <4 x float> %m4_7, ptr %gep2, align 16
   ret void
 }
 
 ; This cannot be converted - insertion position cannot be determined
-define void @aarch64_no_insertion_pos(float* %ptr) {
+define void @aarch64_no_insertion_pos(ptr %ptr) {
 entry:
 ; CHECK-LABEL: @aarch64_no_insertion_pos(
-; CHECK: %p0 = getelementptr inbounds float, float* %ptr, i32 0
-; CHECK-NEXT: %p1 = getelementptr inbounds float, float* %ptr, i32 4
-; CHECK-NEXT: %b0 = bitcast float* %p0 to <5 x float>*
-; CHECK-NEXT: %b1 = bitcast float* %p1 to <5 x float>*
-; CHECK-NEXT: %l0 = load <5 x float>, <5 x float>* %b0
-; CHECK-NEXT: %l1 = load <5 x float>, <5 x float>* %b1
+; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 4
+; CHECK-NEXT: %l0 = load <5 x float>, ptr %ptr
+; CHECK-NEXT: %l1 = load <5 x float>, ptr %p1
 ; CHECK-NEXT: %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 1, i32 3, i32 6, i32 8>
 ; CHECK-NEXT: %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 2, i32 4, i32 7, i32 9>
 ; CHECK-NEXT: ret void
 
-  %p0 = getelementptr inbounds float, float* %ptr, i32 0
-  %p1 = getelementptr inbounds float, float* %ptr, i32 4
-  %b0 = bitcast float* %p0 to <5 x float>*
-  %b1 = bitcast float* %p1 to <5 x float>*
-  %l0 = load <5 x float>, <5 x float>* %b0
-  %l1 = load <5 x float>, <5 x float>* %b1
+  %p1 = getelementptr inbounds float, ptr %ptr, i32 4
+  %l0 = load <5 x float>, ptr %ptr
+  %l1 = load <5 x float>, ptr %p1
   %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 1, i32 3, i32 6, i32 8>
   %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 2, i32 4, i32 7, i32 9>
   ret void
@@ -382,25 +367,19 @@ entry:
 
 ; This cannot be converted - the insertion position does not dominate all
 ; uses
-define void @aarch64_insertpos_does_not_dominate(float* %ptr) {
+define void @aarch64_insertpos_does_not_dominate(ptr %ptr) {
 entry:
 ; CHECK-LABEL: @aarch64_insertpos_does_not_dominate(
-; CHECK: %p0 = getelementptr inbounds float, float* %ptr, i32 0
-; CHECK-NEXT: %p1 = getelementptr inbounds float, float* %ptr, i32 1
-; CHECK-NEXT: %b0 = bitcast float* %p0 to <7 x float>*
-; CHECK-NEXT: %b1 = bitcast float* %p1 to <7 x float>*
-; CHECK-NEXT: %l1 = load <7 x float>, <7 x float>* %b1
+; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 1
+; CHECK-NEXT: %l1 = load <7 x float>, ptr %p1
 ; CHECK-NEXT: %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: %l0 = load <7 x float>, <7 x float>* %b0
+; CHECK-NEXT: %l0 = load <7 x float>, ptr %ptr
 ; CHECK-NEXT: %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT: ret void
-  %p0 = getelementptr inbounds float, float* %ptr, i32 0
-  %p1 = getelementptr inbounds float, float* %ptr, i32 1
-  %b0 = bitcast float* %p0 to <7 x float>*
-  %b1 = bitcast float* %p1 to <7 x float>*
-  %l1 = load <7 x float>, <7 x float>* %b1
+  %p1 = getelementptr inbounds float, ptr %ptr, i32 1
+  %l1 = load <7 x float>, ptr %p1
   %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-  %l0 = load <7 x float>, <7 x float>* %b0
+  %l0 = load <7 x float>, ptr %ptr
   %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
index 1b2ed4b89521b..c32e0b9b3e7aa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
@@ -5,23 +5,21 @@ target triple = "aarch64--linux-android"
 %typeD = type { i32, i32, [256 x i32], [257 x i32] }
 
 ; Function Attrs: noreturn nounwind uwtable
-define i32 @test1(%typeD* nocapture %s) {
+define i32 @test1(ptr nocapture %s) {
 entry:
 ; CHECK-LABEL: entry:
-; CHECK:    %uglygep = getelementptr i8, i8* %0, i64 1032
+; CHECK:    %uglygep = getelementptr i8, ptr %s, i64 1032
 ; CHECK:    br label %do.body.i
 
 
-  %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
-  %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
-  %.pre = load i32, i32* %tPos, align 4
+  %k0 = getelementptr inbounds %typeD, ptr %s, i64 0, i32 1
+  %.pre = load i32, ptr %s, align 4
   br label %do.body.i
 
 do.body.i:
 ; CHECK-LABEL: do.body.i:
-; CHECK:          %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
-; CHECK-NEXT:     %4 = bitcast i8* %uglygep2 to i32*
-; CHECK-NOT:      %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
+; CHECK:          %uglygep2 = getelementptr i8, ptr %uglygep, i64 %2
+; CHECK-NOT:      %uglygep2 = getelementptr i8, ptr %uglygep, i64 1032
 
 
   %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
@@ -29,8 +27,8 @@ do.body.i:
   %add.i = add nsw i32 %1, %0
   %shr.i = ashr i32 %add.i, 1
   %idxprom.i = sext i32 %shr.i to i64
-  %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
-  %2 = load i32, i32* %arrayidx.i, align 4
+  %arrayidx.i = getelementptr inbounds %typeD, ptr %s, i64 0, i32 3, i64 %idxprom.i
+  %2 = load i32, ptr %arrayidx.i, align 4
   %cmp.i = icmp sle i32 %2, %.pre
   %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
   %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
@@ -44,7 +42,7 @@ do.body.i.backedge:
   br label %do.body.i
 
 fooo.exit:                              ; preds = %do.body.i
-  store i32 %nb.1.i, i32* %k0, align 4
+  store i32 %nb.1.i, ptr %k0, align 4
   br label %do.body.i.backedge
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
index 8ae00a2fc9751..a5a421e0955f2 100644
--- a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
+++ b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
@@ -7,7 +7,7 @@
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64-none-linux-gnu"
 
-  define void @ham(i8 addrspace(1)* %arg) gc "statepoint-example" {
+  define void @ham(ptr addrspace(1) %arg) gc "statepoint-example" {
   bb:
     br i1 undef, label %bb27.preheader, label %bb23
 
@@ -66,7 +66,7 @@
     unreachable
   }
 
-  declare i8 addrspace(1)* @bar(i64, i64, i64, i32*)
+  declare ptr addrspace(1) @bar(i64, i64, i64, ptr)
   declare void @wombat()
   declare void @blam.1()
   declare void @blam(i32)
@@ -303,7 +303,7 @@ body:             |
   ; CHECK-NEXT: bb.1.bb27.preheader:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $x24 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   renamable $x24 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   renamable $w21 = MOVi32imm -8280
   ; CHECK-NEXT:   renamable $w23 = MOVi32imm -6
   ; CHECK-NEXT:   renamable $w25 = MOVi32imm 3, implicit-def $x25
@@ -479,13 +479,13 @@ body:             |
   ; CHECK-NEXT:   liveins: $fp, $w20, $w23, $x10, $x19, $x22, $x24, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = COPY $xzr
-  ; CHECK-NEXT:   renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   renamable $w9 = MADDWrrr killed renamable $w9, renamable $w10, $wzr
   ; CHECK-NEXT:   renamable $w23 = nsw SUBWri killed renamable $w23, 2, 0
   ; CHECK-NEXT:   dead $xzr = SUBSXri killed renamable $x25, 107, 0, implicit-def $nzcv
   ; CHECK-NEXT:   renamable $x25 = COPY killed renamable $fp
   ; CHECK-NEXT:   renamable $w21 = MOVi32imm 2
-  ; CHECK-NEXT:   STRWui killed renamable $w9, killed renamable $x8, 0 :: (store unordered (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   STRWui killed renamable $w9, killed renamable $x8, 0 :: (store unordered (s32) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   Bcc 8, %bb.16, implicit killed $nzcv
   ; CHECK-NEXT:   B %bb.3
   ; CHECK-NEXT: {{  $}}
@@ -532,7 +532,7 @@ body:             |
   bb.1.bb27.preheader:
     successors: %bb.3(0x80000000)
 
-    %74:gpr64 = LDRXui undef %75:gpr64sp, 0 :: (load unordered (s64) from `i64 addrspace(1)* undef`, addrspace 1)
+    %74:gpr64 = LDRXui undef %75:gpr64sp, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1)
     %13:gpr32 = MOVi32imm -8280
     %130:gpr32common = MOVi32imm -6
     undef %129.sub_32:gpr64common = MOVi32imm 3
@@ -671,13 +671,13 @@ body:             |
     successors: %bb.14(0x00000000), %bb.3(0x80000000)
 
     %115:gpr64sp = COPY $xzr
-    %116:gpr32 = LDRWui %115, 0 :: (load unordered (s32) from `i32 addrspace(1)* null`, addrspace 1)
+    %116:gpr32 = LDRWui %115, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1)
     %117:gpr32 = MADDWrrr %116, %42.sub_32, $wzr
     %130:gpr32common = nsw SUBWri %130, 2, 0
     dead $xzr = SUBSXri %129, 107, 0, implicit-def $nzcv
     %129:gpr64common = COPY %14
     %13:gpr32 = MOVi32imm 2
-    STRWui %117, %115, 0 :: (store unordered (s32) into `i32 addrspace(1)* null`, addrspace 1)
+    STRWui %117, %115, 0 :: (store unordered (s32) into `ptr addrspace(1) null`, addrspace 1)
     Bcc 8, %bb.14, implicit killed $nzcv
     B %bb.3
 

diff  --git a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll
index 77f1747ea9cfc..e6ab52dc9e619 100644
--- a/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/scalable-vector-promotion.ll
@@ -6,18 +6,18 @@
 ; will assert once target lowering is ready, then we can bring in implementation for non-splat
 ; codepath for scalable vector.
 
-define void @simpleOneInstructionPromotion(<vscale x 2 x i32>* %addr1, i32* %dest) {
+define void @simpleOneInstructionPromotion(ptr %addr1, ptr %dest) {
 ; CHECK-LABEL: @simpleOneInstructionPromotion(
-; CHECK-NEXT:    [[IN1:%.*]] = load <vscale x 2 x i32>, <vscale x 2 x i32>* [[ADDR1:%.*]], align 8
+; CHECK-NEXT:    [[IN1:%.*]] = load <vscale x 2 x i32>, ptr [[ADDR1:%.*]], align 8
 ; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <vscale x 2 x i32> [[IN1]], i32 1
 ; CHECK-NEXT:    [[OUT:%.*]] = or i32 [[EXTRACT]], 1
-; CHECK-NEXT:    store i32 [[OUT]], i32* [[DEST:%.*]], align 4
+; CHECK-NEXT:    store i32 [[OUT]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %in1 = load <vscale x 2 x i32>, <vscale x 2 x i32>* %addr1, align 8
+  %in1 = load <vscale x 2 x i32>, ptr %addr1, align 8
   %extract = extractelement <vscale x 2 x i32> %in1, i32 1
   %out = or i32 %extract, 1
-  store i32 %out, i32* %dest, align 4
+  store i32 %out, ptr %dest, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
index 11389d5d5ba7c..719379b962ca4 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-ex-2.ll
@@ -29,139 +29,133 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-unknown-eabi"
 
- at _ZTIi = external dso_local constant i8*
+ at _ZTIi = external dso_local constant ptr
 
 ; Function Attrs: noreturn sanitize_memtag
 define dso_local void @_Z3barv() local_unnamed_addr #0 {
 entry:
-  %exception = tail call i8* @__cxa_allocate_exception(i64 4) #4
-  %0 = bitcast i8* %exception to i32*
-  store i32 42, i32* %0, align 16, !tbaa !2
-  tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
+  %exception = tail call ptr @__cxa_allocate_exception(i64 4) #4
+  store i32 42, ptr %exception, align 16, !tbaa !2
+  tail call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5
   unreachable
 }
 
-declare dso_local i8* @__cxa_allocate_exception(i64) local_unnamed_addr
+declare dso_local ptr @__cxa_allocate_exception(i64) local_unnamed_addr
 
-declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
+declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
 
 ; Function Attrs: noreturn sanitize_memtag
-define dso_local void @_Z3foov() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define dso_local void @_Z3foov() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   %A0 = alloca i32, align 4
-  %0 = bitcast i32* %A0 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
-  call void asm sideeffect "", "r"(i32* nonnull %A0) #4, !srcloc !6
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %A0) #4
+  call void asm sideeffect "", "r"(ptr nonnull %A0) #4, !srcloc !6
   invoke void @_Z3barv()
           to label %try.cont unwind label %lpad
 
 lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
+  %0 = landingpad { ptr, i32 }
           cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %2 = extractvalue { i8*, i32 } %1, 1
-  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4
-  %matches = icmp eq i32 %2, %3
+          catch ptr @_ZTIi
+  %1 = extractvalue { ptr, i32 } %0, 1
+  %2 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #4
+  %matches = icmp eq i32 %1, %2
   br i1 %matches, label %catch, label %ehcleanup
 
 catch:                                            ; preds = %lpad
-  %4 = extractvalue { i8*, i32 } %1, 0
-  %5 = call i8* @__cxa_begin_catch(i8* %4) #4
+  %3 = extractvalue { ptr, i32 } %0, 0
+  %4 = call ptr @__cxa_begin_catch(ptr %3) #4
   call void @__cxa_end_catch() #4
   br label %try.cont
 
 try.cont:                                         ; preds = %entry, %catch
-  %exception = call i8* @__cxa_allocate_exception(i64 4) #4
-  %6 = bitcast i8* %exception to i32*
-  store i32 15532, i32* %6, align 16, !tbaa !2
-  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
+  %exception = call ptr @__cxa_allocate_exception(i64 4) #4
+  store i32 15532, ptr %exception, align 16, !tbaa !2
+  call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5
   unreachable
 
 ehcleanup:                                        ; preds = %lpad
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
-  resume { i8*, i32 } %1
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %A0) #4
+  resume { ptr, i32 } %0
 }
 
 ; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
 
 declare dso_local i32 @__gxx_personality_v0(...)
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
+declare i32 @llvm.eh.typeid.for(ptr) #2
 
-declare dso_local i8* @__cxa_begin_catch(i8*) local_unnamed_addr
+declare dso_local ptr @__cxa_begin_catch(ptr) local_unnamed_addr
 
 declare dso_local void @__cxa_end_catch() local_unnamed_addr
 
 ; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
 
 ; Function Attrs: norecurse sanitize_memtag
-define dso_local i32 @main() local_unnamed_addr #3 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define dso_local i32 @main() local_unnamed_addr #3 personality ptr @__gxx_personality_v0 {
 entry:
 ; CHECK-LABEL: entry:
   %A0.i = alloca i32, align 4
-  %0 = bitcast i32* %A0.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
-  call void asm sideeffect "", "r"(i32* nonnull %A0.i) #4, !srcloc !6
-; CHECK: call void @llvm.aarch64.settag(i8* %1, i64 16)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %A0.i) #4
+  call void asm sideeffect "", "r"(ptr nonnull %A0.i) #4, !srcloc !6
+; CHECK: call void @llvm.aarch64.settag(ptr %A0.i.tag, i64 16)
 ; CHECK-NEXT: call void asm sideeffect
-  %exception.i6 = call i8* @__cxa_allocate_exception(i64 4) #4
-  %1 = bitcast i8* %exception.i6 to i32*
-  store i32 42, i32* %1, align 16, !tbaa !2
-  invoke void @__cxa_throw(i8* %exception.i6, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
+  %exception.i6 = call ptr @__cxa_allocate_exception(i64 4) #4
+  store i32 42, ptr %exception.i6, align 16, !tbaa !2
+  invoke void @__cxa_throw(ptr %exception.i6, ptr @_ZTIi, ptr null) #5
           to label %.noexc7 unwind label %lpad.i
 
 .noexc7:                                          ; preds = %entry
   unreachable
 
 lpad.i:                                           ; preds = %entry
-  %2 = landingpad { i8*, i32 }
+  %0 = landingpad { ptr, i32 }
           cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %3 = extractvalue { i8*, i32 } %2, 1
-  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4
-  %matches.i = icmp eq i32 %3, %4
+          catch ptr @_ZTIi
+  %1 = extractvalue { ptr, i32 } %0, 1
+  %2 = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #4
+  %matches.i = icmp eq i32 %1, %2
   br i1 %matches.i, label %catch.i, label %ehcleanup.i
 
 catch.i:                                          ; preds = %lpad.i
-  %5 = extractvalue { i8*, i32 } %2, 0
-  %6 = call i8* @__cxa_begin_catch(i8* %5) #4
+  %3 = extractvalue { ptr, i32 } %0, 0
+  %4 = call ptr @__cxa_begin_catch(ptr %3) #4
   call void @__cxa_end_catch() #4
-  %exception.i = call i8* @__cxa_allocate_exception(i64 4) #4
-  %7 = bitcast i8* %exception.i to i32*
-  store i32 15532, i32* %7, align 16, !tbaa !2
-  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #5
+  %exception.i = call ptr @__cxa_allocate_exception(i64 4) #4
+  store i32 15532, ptr %exception.i, align 16, !tbaa !2
+  invoke void @__cxa_throw(ptr %exception.i, ptr @_ZTIi, ptr null) #5
           to label %.noexc unwind label %lpad
 
 .noexc:                                           ; preds = %catch.i
   unreachable
 
 ehcleanup.i:                                      ; preds = %lpad.i
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %A0.i) #4
   br label %lpad.body
 
 lpad:                                             ; preds = %catch.i
-  %8 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %.pre = extractvalue { i8*, i32 } %8, 1
+  %5 = landingpad { ptr, i32 }
+          catch ptr @_ZTIi
+  %.pre = extractvalue { ptr, i32 } %5, 1
   br label %lpad.body
 
 lpad.body:                                        ; preds = %ehcleanup.i, %lpad
-  %.pre-phi = phi i32 [ %3, %ehcleanup.i ], [ %.pre, %lpad ]
-  %eh.lpad-body = phi { i8*, i32 } [ %2, %ehcleanup.i ], [ %8, %lpad ]
-  %matches = icmp eq i32 %.pre-phi, %4
+  %.pre-phi = phi i32 [ %1, %ehcleanup.i ], [ %.pre, %lpad ]
+  %eh.lpad-body = phi { ptr, i32 } [ %0, %ehcleanup.i ], [ %5, %lpad ]
+  %matches = icmp eq i32 %.pre-phi, %2
   br i1 %matches, label %catch, label %eh.resume
 
 catch:                                            ; preds = %lpad.body
-  %9 = extractvalue { i8*, i32 } %eh.lpad-body, 0
-  %10 = call i8* @__cxa_begin_catch(i8* %9) #4
+  %6 = extractvalue { ptr, i32 } %eh.lpad-body, 0
+  %7 = call ptr @__cxa_begin_catch(ptr %6) #4
   call void @__cxa_end_catch() #4
   ret i32 0
 
 eh.resume:                                        ; preds = %lpad.body
-  resume { i8*, i32 } %eh.lpad-body
+  resume { ptr, i32 } %eh.lpad-body
 }
 
 attributes #0 = { noreturn sanitize_memtag "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+mte,+neon,+v8.5a" "unsafe-fp-math"="false" "use-soft-float"="false" }

diff  --git a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
index f6081926743f4..e449faba048d4 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll
@@ -3,48 +3,42 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android"
 
-declare void @use(i8*)
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+declare void @use(ptr)
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
 
 define void @OneVarNoInit() sanitize_memtag {
 entry:
   %x = alloca i32, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @OneVarNoInit(
 ; CHECK-DAG:  [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
-; CHECK-DAG:  [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i32, [12 x i8] }* [[X]], {{.*}}, i64 0)
-; CHECK-DAG:  [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32*
-; CHECK-DAG:  [[TX8:%.*]] = bitcast i32* [[TX32]] to i8*
-; CHECK-DAG:  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TX8]])
-; CHECK-DAG:  call void @llvm.aarch64.settag(i8* [[TX8]], i64 16)
-; CHECK-DAG:  call void @use(i8* nonnull [[TX8]])
-; CHECK-DAG:  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TX8]])
+; CHECK-DAG:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], {{.*}}, i64 0)
+; CHECK-DAG:  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TX]])
+; CHECK-DAG:  call void @llvm.aarch64.settag(ptr [[TX]], i64 16)
+; CHECK-DAG:  call void @use(ptr nonnull [[TX]])
+; CHECK-DAG:  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TX]])
 
 define void @OneVarInitConst() sanitize_memtag {
 entry:
   %x = alloca i32, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
-  store i32 42, i32* %x, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+  store i32 42, ptr %x, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @OneVarInitConst(
-; CHECK:  [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp
-; CHECK:  [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32*
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX32]] to i8*
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
 ; CHECK-NOT: aarch64.settag
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0)
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 42, i64 0)
 ; Untagging before lifetime.end:
 ; CHECK:  call void @llvm.aarch64.settag(
 ; CHECK-NOT: aarch64.settag
@@ -53,168 +47,149 @@ entry:
 define void @ArrayInitConst() sanitize_memtag {
 entry:
   %x = alloca i32, i32 16, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
-  store i32 42, i32* %x, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x)
+  store i32 42, ptr %x, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @ArrayInitConst(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp.
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0)
-; CHECK:  [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 42, i64 0)
+; CHECK:  [[TX8_16:%.*]] = getelementptr i8, ptr [[TX]], i32 16
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX8_16]], i64 48)
 ; CHECK:  ret void
 
 define void @ArrayInitConst2() sanitize_memtag {
 entry:
   %x = alloca i32, i32 16, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
-  store i32 42, i32* %x, align 4
-  %1 = getelementptr i32, i32* %x, i32 1
-  store i32 43, i32* %1, align 4
-  %2 = getelementptr i32, i32* %x, i32 2
-  %3 = bitcast i32* %2 to i64*
-  store i64 -1, i64* %3, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x)
+  store i32 42, ptr %x, align 4
+  %0 = getelementptr i32, ptr %x, i32 1
+  store i32 43, ptr %0, align 4
+  %1 = getelementptr i32, ptr %x, i32 2
+  store i64 -1, ptr %1, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @ArrayInitConst2(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp.
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 184683593770, i64 -1)
-; CHECK:  [[TX8_16:%.*]] = getelementptr i8, i8* [[TX8]], i32 16
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 184683593770, i64 -1)
+; CHECK:  [[TX8_16:%.*]] = getelementptr i8, ptr [[TX]], i32 16
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX8_16]], i64 48)
 ; CHECK:  ret void
 
 define void @ArrayInitConstSplit() sanitize_memtag {
 entry:
   %x = alloca i32, i32 16, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
-  %1 = getelementptr i32, i32* %x, i32 1
-  %2 = bitcast i32* %1 to i64*
-  store i64 -1, i64* %2, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %x)
+  %0 = getelementptr i32, ptr %x, i32 1
+  store i64 -1, ptr %0, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @ArrayInitConstSplit(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp.
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 -4294967296, i64 4294967295)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 -4294967296, i64 4294967295)
 ; CHECK:  ret void
 
 define void @ArrayInitConstWithHoles() sanitize_memtag {
 entry:
   %x = alloca i32, i32 32, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0)
-  %1 = getelementptr i32, i32* %x, i32 5
-  store i32 42, i32* %1, align 4
-  %2 = getelementptr i32, i32* %x, i32 14
-  store i32 43, i32* %2, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %x)
+  %0 = getelementptr i32, ptr %x, i32 5
+  store i32 42, ptr %0, align 4
+  %1 = getelementptr i32, ptr %x, i32 14
+  store i32 43, ptr %1, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @ArrayInitConstWithHoles(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp.
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 16)
-; CHECK:  [[TX8_16:%.*]] = getelementptr i8, i8* %0, i32 16
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8_16]], i64 180388626432, i64 0)
-; CHECK:  [[TX8_32:%.*]] = getelementptr i8, i8* %0, i32 32
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8_32]], i64 16)
-; CHECK:  [[TX8_48:%.*]] = getelementptr i8, i8* %0, i32 48
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8_48]], i64 0, i64 43)
-; CHECK:  [[TX8_64:%.*]] = getelementptr i8, i8* %0, i32 64
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8_64]], i64 64)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp.
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX]], i64 16)
+; CHECK:  [[TX8_16:%.*]] = getelementptr i8, ptr %x.tag, i32 16
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX8_16]], i64 180388626432, i64 0)
+; CHECK:  [[TX8_32:%.*]] = getelementptr i8, ptr %x.tag, i32 32
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX8_32]], i64 16)
+; CHECK:  [[TX8_48:%.*]] = getelementptr i8, ptr %x.tag, i32 48
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX8_48]], i64 0, i64 43)
+; CHECK:  [[TX8_64:%.*]] = getelementptr i8, ptr %x.tag, i32 64
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX8_64]], i64 64)
 ; CHECK:  ret void
 
 define void @InitNonConst(i32 %v) sanitize_memtag {
 entry:
   %x = alloca i32, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
-  store i32 %v, i32* %x, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x)
+  store i32 %v, ptr %x, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @InitNonConst(
-; CHECK:  [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp
-; CHECK:  [[TX32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32*
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX32]] to i8*
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
 ; CHECK:  [[V:%.*]] = zext i32 %v to i64
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[V]], i64 0)
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 [[V]], i64 0)
 ; CHECK:  ret void
 
 define void @InitNonConst2(i32 %v, i32 %w) sanitize_memtag {
 entry:
   %x = alloca i32, i32 4, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
-  store i32 %v, i32* %x, align 4
-  %1 = getelementptr i32, i32* %x, i32 1
-  store i32 %w, i32* %1, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x)
+  store i32 %v, ptr %x, align 4
+  %0 = getelementptr i32, ptr %x, i32 1
+  store i32 %w, ptr %0, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @InitNonConst2(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
 ; CHECK:  [[V:%.*]] = zext i32 %v to i64
 ; CHECK:  [[W:%.*]] = zext i32 %w to i64
 ; CHECK:  [[WS:%.*]] = shl i64 [[W]], 32
 ; CHECK:  [[VW:%.*]] = or i64 [[V]], [[WS]]
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[VW]], i64 0)
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 [[VW]], i64 0)
 ; CHECK:  ret void
 
 define void @InitVector() sanitize_memtag {
 entry:
   %x = alloca i32, i32 4, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
-  %1 = bitcast i32* %x to <2 x i32>*
-  store <2 x i32> <i32 1, i32 2>, <2 x i32>* %1, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x)
+  store <2 x i32> <i32 1, i32 2>, ptr %x, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @InitVector(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 0)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 0)
 ; CHECK:  ret void
 
-define void @InitVectorPtr(i32* %p) sanitize_memtag {
+define void @InitVectorPtr(ptr %p) sanitize_memtag {
 entry:
-  %s = alloca <4 x i32*>, align 8
-  %v0 = insertelement <4 x i32*> undef, i32* %p, i32 0
-  %v1 = shufflevector <4 x i32*> %v0, <4 x i32*> undef, <4 x i32> zeroinitializer
-  store <4 x i32*> %v1, <4 x i32*>* %s
-  %0 = bitcast <4 x i32*>* %s to i8*
-  call void @use(i8* nonnull %0)
+  %s = alloca <4 x ptr>, align 8
+  %v0 = insertelement <4 x ptr> undef, ptr %p, i32 0
+  %v1 = shufflevector <4 x ptr> %v0, <4 x ptr> undef, <4 x i32> zeroinitializer
+  store <4 x ptr> %v1, ptr %s
+  call void @use(ptr nonnull %s)
   ret void
 }
 
 ; CHECK-LABEL: define void @InitVectorPtr(
-; CHECK:  call <4 x i32*>* @llvm.aarch64.tagp
+; CHECK:  call ptr @llvm.aarch64.tagp
 ; CHECK:  [[V1:%.*]] = shufflevector
-; CHECK:  [[V2:%.*]] = ptrtoint <4 x i32*> [[V1]] to <4 x i64>
+; CHECK:  [[V2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64>
 ; CHECK:  [[V3:%.*]] = bitcast <4 x i64> [[V2]] to i256
 ; CHECK:  [[A1:%.*]] = trunc i256 [[V3]] to i64
 ; CHECK:  [[A2_:%.*]] = lshr i256 [[V3]], 64
@@ -230,93 +205,86 @@ entry:
 define void @InitVectorSplit() sanitize_memtag {
 entry:
   %x = alloca i32, i32 4, align 4
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
-  %1 = getelementptr i32, i32* %x, i32 1
-  %2 = bitcast i32* %1 to <2 x i32>*
-  store <2 x i32> <i32 1, i32 2>, <2 x i32>* %2, align 4
-  call void @use(i8* nonnull %0)
-  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %x)
+  %0 = getelementptr i32, ptr %x, i32 1
+  store <2 x i32> <i32 1, i32 2>, ptr %0, align 4
+  call void @use(ptr nonnull %x)
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @InitVectorSplit(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.stgp(i8* [[TX8]], i64 shl (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32))
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
+; CHECK:  call void @llvm.aarch64.stgp(ptr [[TX]], i64 shl (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32))
 ; CHECK:  ret void
 
 define void @MemSetZero() sanitize_memtag {
 entry:
   %x = alloca i32, i32 8, align 16
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 0, i64 32, i1 false)
-  call void @use(i8* nonnull %0)
+  call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 0, i64 32, i1 false)
+  call void @use(ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @MemSetZero(
-; CHECK:  [[TX:%.*]] = call i32* @llvm.aarch64.tagp
-; CHECK:  [[TX8:%.*]] = bitcast i32* [[TX]] to i8*
-; CHECK:  call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 32)
+; CHECK:  [[TX:%.*]] = call ptr @llvm.aarch64.tagp
+; CHECK:  call void @llvm.aarch64.settag.zero(ptr [[TX]], i64 32)
 ; CHECK:  ret void
 
 
 define void @MemSetNonZero() sanitize_memtag {
 entry:
   %x = alloca i32, i32 8, align 16
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 32, i1 false)
-  call void @use(i8* nonnull %0)
+  call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 42, i64 32, i1 false)
+  call void @use(ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @MemSetNonZero(
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
 ; CHECK:  ret void
 
 
 define void @MemSetNonZero2() sanitize_memtag {
 entry:
   %x = alloca [32 x i8], align 16
-  %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2
-  call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 28, i1 false)
-  call void @use(i8* nonnull %0)
+  %0 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 2
+  call void @llvm.memset.p0.i64(ptr nonnull %0, i8 42, i64 28, i1 false)
+  call void @use(ptr nonnull %0)
   ret void
 }
 
 ; CHECK-LABEL: define void @MemSetNonZero2(
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199209472, i64 3038287259199220266)
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 46360584399402)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199209472, i64 3038287259199220266)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 3038287259199220266, i64 46360584399402)
 ; CHECK:  ret void
 
 define void @MemSetNonZero3() sanitize_memtag {
 entry:
   %x = alloca [32 x i8], align 16
-  %0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2
-  call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 4, i1 false)
-  %1 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 24
-  call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 42, i64 8, i1 false)
-  call void @use(i8* nonnull %0)
+  %0 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 2
+  call void @llvm.memset.p0.i64(ptr nonnull %0, i8 42, i64 4, i1 false)
+  %1 = getelementptr inbounds [32 x i8], ptr %x, i64 0, i64 24
+  call void @llvm.memset.p0.i64(ptr nonnull %1, i8 42, i64 8, i1 false)
+  call void @use(ptr nonnull %0)
   ret void
 }
 
 ; CHECK-LABEL: define void @MemSetNonZero3(
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 46360584388608, i64 0)
-; CHECK:  call void @llvm.aarch64.stgp(i8* {{.*}}, i64 0, i64 3038287259199220266)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 46360584388608, i64 0)
+; CHECK:  call void @llvm.aarch64.stgp(ptr {{.*}}, i64 0, i64 3038287259199220266)
 ; CHECK:  ret void
 
 define void @LargeAlloca() sanitize_memtag {
 entry:
   %x = alloca i32, i32 256, align 16
-  %0 = bitcast i32* %x to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 256, i1 false)
-  call void @use(i8* nonnull %0)
+  call void @llvm.memset.p0.i64(ptr nonnull align 16 %x, i8 42, i64 256, i1 false)
+  call void @use(ptr nonnull %x)
   ret void
 }
 
 ; CHECK-LABEL: define void @LargeAlloca(
-; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 1024)
-; CHECK:  call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 42, i64 256,
+; CHECK:  call void @llvm.aarch64.settag(ptr {{.*}}, i64 1024)
+; CHECK:  call void @llvm.memset.p0.i64(ptr {{.*}}, i8 42, i64 256,
 ; CHECK:  ret void

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
index 75e128748eac8..e3d368b7b85c6 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector-to-predicate-store.ll
@@ -2,77 +2,74 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @pred_store_v2i8(<vscale x 16 x i1> %pred, <2 x i8>* %addr) #0 {
+define void @pred_store_v2i8(<vscale x 16 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: @pred_store_v2i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    store <vscale x 16 x i1> %pred, ptr %addr
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <2 x i8> %extract, <2 x i8>* %addr, align 4
+  store <2 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
-define void @pred_store_v4i8(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
+define void @pred_store_v4i8(<vscale x 16 x i1> %pred, ptr %addr) #1 {
 ; CHECK-LABEL: @pred_store_v4i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    store <vscale x 16 x i1> %pred, ptr %addr
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <4 x i8> %extract, <4 x i8>* %addr, align 4
+  store <4 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
-define void @pred_store_v8i8(<vscale x 16 x i1> %pred, <8 x i8>* %addr) #2 {
+define void @pred_store_v8i8(<vscale x 16 x i1> %pred, ptr %addr) #2 {
 ; CHECK-LABEL: @pred_store_v8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    store <vscale x 16 x i1> %pred, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    store <vscale x 16 x i1> %pred, ptr %addr
 ; CHECK-NEXT:    ret void
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <8 x i8> %extract, <8 x i8>* %addr, align 4
+  store <8 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
 
 ; Check that too small of a vscale prevents optimization
-define void @pred_store_neg1(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #0 {
+define void @pred_store_neg1(<vscale x 16 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: @pred_store_neg1(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <4 x i8> %extract, <4 x i8>* %addr, align 4
+  store <4 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
 ; Check that too large of a vscale prevents optimization
-define void @pred_store_neg2(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #2 {
+define void @pred_store_neg2(<vscale x 16 x i1> %pred, ptr %addr) #2 {
 ; CHECK-LABEL: @pred_store_neg2(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <4 x i8> %extract, <4 x i8>* %addr, align 4
+  store <4 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
 ; Check that a non-zero index prevents optimization
-define void @pred_store_neg3(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #1 {
+define void @pred_store_neg3(<vscale x 16 x i1> %pred, ptr %addr) #1 {
 ; CHECK-LABEL: @pred_store_neg3(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 4)
-  store <4 x i8> %extract, <4 x i8>* %addr, align 4
+  store <4 x i8> %extract, ptr %addr, align 4
   ret void
 }
 
 ; Check that 
diff ering vscale min/max prevents optimization
-define void @pred_store_neg4(<vscale x 16 x i1> %pred, <4 x i8>* %addr) #3 {
+define void @pred_store_neg4(<vscale x 16 x i1> %pred, ptr %addr) #3 {
 ; CHECK-LABEL: @pred_store_neg4(
 ; CHECK:         call <4 x i8> @llvm.vector.extract
   %bitcast = bitcast <vscale x 16 x i1> %pred to <vscale x 2 x i8>
   %extract = tail call <4 x i8> @llvm.vector.extract.v4i8.nxv2i8(<vscale x 2 x i8> %bitcast, i64 0)
-  store <4 x i8> %extract, <4 x i8>* %addr, align 4
+  store <4 x i8> %extract, ptr %addr, align 4
   ret void
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
index e676708039e26..b906de7ed9f74 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll
@@ -2,50 +2,46 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define <vscale x 16 x i1> @pred_load_v2i8(<2 x i8>* %addr) #0 {
+define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 {
 ; CHECK-LABEL: @pred_load_v2i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
-  %load = load <2 x i8>, <2 x i8>* %addr, align 4
+  %load = load <2 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
-define <vscale x 16 x i1> @pred_load_v4i8(<4 x i8>* %addr) #1 {
+define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 {
 ; CHECK-LABEL: @pred_load_v4i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
-define <vscale x 16 x i1> @pred_load_v8i8(<8 x i8>* %addr) #2 {
+define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 {
 ; CHECK-LABEL: @pred_load_v8i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
-  %load = load <8 x i8>, <8 x i8>* %addr, align 4
+  %load = load <8 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
 ; Ensure the insertion point is at the load
-define <vscale x 16 x i1> @pred_load_insertion_point(<2 x i8>* %addr) #0 {
+define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 {
 ; CHECK-LABEL: @pred_load_insertion_point(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i8>* %addr to <vscale x 16 x i1>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
 ; CHECK-NEXT:    br label %bb1
 ; CHECK:       bb1:
 ; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 entry:
-  %load = load <2 x i8>, <2 x i8>* %addr, align 4
+  %load = load <2 x i8>, ptr %addr, align 4
   br label %bb1
 
 bb1:
@@ -55,50 +51,50 @@ bb1:
 }
 
 ; Check that too small of a vscale prevents optimization
-define <vscale x 16 x i1> @pred_load_neg1(<4 x i8>* %addr) #0 {
+define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 {
 ; CHECK-LABEL: @pred_load_neg1(
 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
 ; Check that too large of a vscale prevents optimization
-define <vscale x 16 x i1> @pred_load_neg2(<4 x i8>* %addr) #2 {
+define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 {
 ; CHECK-LABEL: @pred_load_neg2(
 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
 ; Check that a non-zero index prevents optimization
-define <vscale x 16 x i1> @pred_load_neg3(<4 x i8>* %addr) #1 {
+define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 {
 ; CHECK-LABEL: @pred_load_neg3(
 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
 ; Check that 
diff ering vscale min/max prevents optimization
-define <vscale x 16 x i1> @pred_load_neg4(<4 x i8>* %addr) #3 {
+define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 {
 ; CHECK-LABEL: @pred_load_neg4(
 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret
 }
 
 ; Check that insertion into a non-undef vector prevents optimization
-define <vscale x 16 x i1> @pred_load_neg5(<4 x i8>* %addr, <vscale x 2 x i8> %passthru) #1 {
+define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 {
 ; CHECK-LABEL: @pred_load_neg5(
 ; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
-  %load = load <4 x i8>, <4 x i8>* %addr, align 4
+  %load = load <4 x i8>, ptr %addr, align 4
   %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
   %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
   ret <vscale x 16 x i1> %ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll
index b55e23cd0613a..585c5d7a2472e 100644
--- a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll
@@ -6,10 +6,10 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-linux-gnu"
 
-; These tests check that the IR coming out of LSR does not cast input/output pointer from i16* to i8* type.
+; These tests check that the IR coming out of LSR does not cast input/output pointer from ptr to ptr type.
 ; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1].
 
-define void @ld_st_nxv8i16(i16* %in, i16* %out) {
+define void @ld_st_nxv8i16(ptr %in, ptr %out) {
 ; IR-LABEL: @ld_st_nxv8i16(
 ; IR-NEXT:  entry:
 ; IR-NEXT:    br label [[LOOP_PH:%.*]]
@@ -21,13 +21,13 @@ define void @ld_st_nxv8i16(i16* %in, i16* %out) {
 ; IR-NEXT:    br label [[LOOP:%.*]]
 ; IR:       loop:
 ; IR-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
-; IR-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i16, i16* [[IN:%.*]], i64 [[INDVAR]]
-; IR-NEXT:    [[SCEVGEP23:%.*]] = bitcast i16* [[SCEVGEP2]] to <vscale x 8 x i16>*
-; IR-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[OUT:%.*]], i64 [[INDVAR]]
-; IR-NEXT:    [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to <vscale x 8 x i16>*
-; IR-NEXT:    [[VAL:%.*]] = load <vscale x 8 x i16>, <vscale x 8 x i16>* [[SCEVGEP23]], align 16
+; IR-NEXT:    [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
+; IR-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
+; IR-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
+; IR-NEXT:    [[VAL:%.*]] = load <vscale x 8 x i16>, ptr [[UGLYGEP1]], align 16
 ; IR-NEXT:    [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
-; IR-NEXT:    store <vscale x 8 x i16> [[ADDP_VEC]], <vscale x 8 x i16>* [[SCEVGEP1]], align 16
+; IR-NEXT:    store <vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], align 16
 ; IR-NEXT:    [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
 ; IR-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024
 ; IR-NEXT:    br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
@@ -64,13 +64,11 @@ loop.ph:
 
 loop:                                             ; preds = %loop, %loop.ph
   %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
-  %ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar
-  %ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar
-  %in.ptrcast = bitcast i16* %ptr.in to <vscale x 8 x i16>*
-  %out.ptrcast = bitcast i16* %ptr.out to <vscale x 8 x i16>*
-  %val = load <vscale x 8 x i16>, <vscale x 8 x i16>* %in.ptrcast, align 16
+  %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
+  %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
+  %val = load <vscale x 8 x i16>, ptr %ptr.in, align 16
   %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
-  store <vscale x 8 x i16> %addp_vec, <vscale x 8 x i16>* %out.ptrcast, align 16
+  store <vscale x 8 x i16> %addp_vec, ptr %ptr.out, align 16
   %indvar.next = add nsw i64 %indvar, %scaled_vf
   %exit.cond = icmp eq i64 %indvar.next, 1024
   br i1 %exit.cond, label %loop.exit, label %loop
@@ -82,7 +80,7 @@ exit:
   ret void
 }
 
-define void @masked_ld_st_nxv8i16(i16* %in, i16* %out, i64 %n) {
+define void @masked_ld_st_nxv8i16(ptr %in, ptr %out, i64 %n) {
 ; IR-LABEL: @masked_ld_st_nxv8i16(
 ; IR-NEXT:  entry:
 ; IR-NEXT:    br label [[LOOP_PH:%.*]]
@@ -96,13 +94,13 @@ define void @masked_ld_st_nxv8i16(i16* %in, i16* %out, i64 %n) {
 ; IR-NEXT:    br label [[LOOP:%.*]]
 ; IR:       loop:
 ; IR-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ]
-; IR-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i16, i16* [[IN:%.*]], i64 [[INDVAR]]
-; IR-NEXT:    [[SCEVGEP23:%.*]] = bitcast i16* [[SCEVGEP2]] to <vscale x 8 x i16>*
-; IR-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[OUT:%.*]], i64 [[INDVAR]]
-; IR-NEXT:    [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to <vscale x 8 x i16>*
-; IR-NEXT:    [[VAL:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[SCEVGEP23]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef)
+; IR-NEXT:    [[TMP0:%.*]] = shl i64 [[INDVAR]], 1
+; IR-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]]
+; IR-NEXT:    [[TMP1:%.*]] = shl i64 [[INDVAR]], 1
+; IR-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]]
+; IR-NEXT:    [[VAL:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[UGLYGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef)
 ; IR-NEXT:    [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]]
-; IR-NEXT:    call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[ADDP_VEC]], <vscale x 8 x i16>* [[SCEVGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]])
+; IR-NEXT:    call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]])
 ; IR-NEXT:    [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]]
 ; IR-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[N:%.*]], [[INDVAR_NEXT]]
 ; IR-NEXT:    br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
@@ -141,13 +139,11 @@ loop.ph:
 
 loop:                                             ; preds = %loop, %loop.ph
   %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ]
-  %ptr.in = getelementptr inbounds i16, i16* %in, i64 %indvar
-  %ptr.out = getelementptr inbounds i16, i16* %out, i64 %indvar
-  %in.ptrcast = bitcast i16* %ptr.in to <vscale x 8 x i16>*
-  %out.ptrcast = bitcast i16* %ptr.out to <vscale x 8 x i16>*
-  %val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* %in.ptrcast, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef)
+  %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar
+  %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar
+  %val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %ptr.in, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef)
   %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat
-  call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %addp_vec, <vscale x 8 x i16>* %out.ptrcast, i32 4, <vscale x 8 x i1> %ptrue_vec.splat)
+  call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %addp_vec, ptr %ptr.out, i32 4, <vscale x 8 x i1> %ptrue_vec.splat)
   %indvar.next = add nsw i64 %indvar, %scaled_vf
   %exit.cond = icmp eq i64 %indvar.next, %n
   br i1 %exit.cond, label %loop.exit, label %loop
@@ -161,6 +157,6 @@ exit:
 
 declare i64 @llvm.vscale.i64()
 
-declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
 
-declare void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32 immarg, <vscale x 8 x i1>)
+declare void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, i32 immarg, <vscale x 8 x i1>)