[llvm] 2fab927 - [LoopVectorize] Convert some tests to opaque pointers (NFC)

Wed Jan 4 08:27:11 PST 2023

Author: Nikita Popov
Date: 2023-01-04T17:25:42+01:00
New Revision: 2fab927546b34f5af7770541a9bbb974d9818c5c

URL: https://github.com/llvm/llvm-project/commit/2fab927546b34f5af7770541a9bbb974d9818c5c
DIFF: https://github.com/llvm/llvm-project/commit/2fab927546b34f5af7770541a9bbb974d9818c5c.diff

LOG: [LoopVectorize] Convert some tests to opaque pointers (NFC)

Check lines for some of these tests were regenerated. The difference
is that with opaque pointers SCEVExpander always emits i8 GEPs,
making the address calculation explicit. This is a known problem
that will be solved long term by making all address calculations
explicit.

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
    llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
    llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll
    llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll
    llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll
    llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
    llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll
    llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
    llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
    llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
    llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/pr38110.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
    llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll
    llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
    llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
    llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
    llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
    llvm/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
    llvm/test/Transforms/LoopVectorize/ee-crash.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
    llvm/test/Transforms/LoopVectorize/global_alias.ll
    llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
    llvm/test/Transforms/LoopVectorize/nsw-crash.ll
    llvm/test/Transforms/LoopVectorize/pointer-induction.ll
    llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
    llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
    llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
    llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
    llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index 7b501486ace8e..cb9ba1b088cb6 100644

--- a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -11,8 +11,8 @@ target triple = "aarch64--linux-gnueabi"
 ;   }
 
 ; CHECK-LABEL: @ind_plus2(
-; CHECK: load <4 x i32>, <4 x i32>*
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
+; CHECK: load <4 x i32>, ptr
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add <4 x i32>
@@ -21,21 +21,21 @@ target triple = "aarch64--linux-gnueabi"
 ; CHECK: icmp eq i64 %index.next, 512
 
 ; FORCE-VEC-LABEL: @ind_plus2(
-; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, ptr
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add <2 x i32>
 ; FORCE-VEC: %index.next = add nuw i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 512
-define i32 @ind_plus2(i32* %A) {
+define i32 @ind_plus2(ptr %A) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
-  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
+  %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr, %for.body ]
   %i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32, i32* %A.addr, align 4
+  %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1
+  %0 = load i32, ptr %A.addr, align 4
   %mul = mul nsw i32 %0, %i
   %add = add nsw i32 %mul, %sum
   %add1 = add nsw i32 %i, 2
@@ -55,8 +55,8 @@ for.end:                                          ; preds = %for.body
 ;   }
 
 ; CHECK-LABEL: @ind_minus2(
-; CHECK: load <4 x i32>, <4 x i32>*
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
+; CHECK: load <4 x i32>, ptr
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add <4 x i32>
@@ -65,21 +65,21 @@ for.end:                                          ; preds = %for.body
 ; CHECK: icmp eq i64 %index.next, 512
 
 ; FORCE-VEC-LABEL: @ind_minus2(
-; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, ptr
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add <2 x i32>
 ; FORCE-VEC: %index.next = add nuw i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 512
-define i32 @ind_minus2(i32* %A) {
+define i32 @ind_minus2(ptr %A) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
-  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
+  %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr, %for.body ]
   %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32, i32* %A.addr, align 4
+  %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1
+  %0 = load i32, ptr %A.addr, align 4
   %mul = mul nsw i32 %0, %i
   %add = add nsw i32 %mul, %sum
   %sub = add nsw i32 %i, -2
@@ -123,18 +123,18 @@ for.end:                                          ; preds = %for.body
 ; FORCE-VEC: add <2 x i32>
 ; FORCE-VEC: %index.next = add nuw i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 1024
-define i32 @ptr_ind_plus2(i32* %A) {
+define i32 @ptr_ind_plus2(ptr %A) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
-  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ]
+  %A.addr = phi ptr [ %A, %entry ], [ %inc.ptr1, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32, i32* %A.addr, align 4
-  %inc.ptr1 = getelementptr inbounds i32, i32* %A.addr, i64 2
-  %1 = load i32, i32* %inc.ptr, align 4
+  %inc.ptr = getelementptr inbounds i32, ptr %A.addr, i64 1
+  %0 = load i32, ptr %A.addr, align 4
+  %inc.ptr1 = getelementptr inbounds i32, ptr %A.addr, i64 2
+  %1 = load i32, ptr %inc.ptr, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %sum
   %inc = add nsw i32 %i, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
index 99486a1851743..38cab8a025779 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -18,7 +18,7 @@
 ;   return a;
 ; }
 ;
-define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
+define i32 @PR33613(ptr %b, double %j, i32 %d) #0 {
 ; CHECK-VF4UF2-LABEL: @PR33613
 ; CHECK-VF4UF2: vector.body
 ; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
@@ -35,18 +35,18 @@ for.cond.cleanup:
   ret i32 %a.1.lcssa
 
 for.body:
-  %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
+  %b.addr.012 = phi ptr [ %b, %entry ], [ %add.ptr, %for.body ]
   %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
   %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
   %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b.addr.012, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
   %mul = fmul double %j.addr.09, %0
   %tobool = fcmp une double %mul, 0.000000e+00
   %inc = zext i1 %tobool to i32
   %a.1 = add nsw i32 %a.010, %inc
   %inc1 = add nuw nsw i32 %i.011, 1
-  %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
+  %add.ptr = getelementptr inbounds double, ptr %b.addr.012, i64 25
   %exitcond = icmp eq i32 %inc1, 10240
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
 }
@@ -66,32 +66,31 @@ for.body:
 ; }
 ;
 ; Check that the sext sank after the load in the vector loop.
-define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
+define void @PR34711(ptr %a, ptr %b, ptr %c, i64 %n) #0 {
 ; CHECK-VF4UF1-LABEL: @PR34711
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> {{.*}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
+; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i16> poison)
 ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1)
 ; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
 ; CHECK-VF4UF1-NEXT: %[[SXT2:.*]] = sext <vscale x 4 x i16> %[[MGATHER]] to <vscale x 4 x i32>
 ; CHECK-VF4UF1-NEXT: mul nsw <vscale x 4 x i32> %[[SXT2]], %[[SXT1]]
 entry:
-  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
-  %.pre = load i16, i16* %pre.index
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
   %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
-  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
+  %arraycidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  %cur.index = getelementptr inbounds [2 x i16], ptr %a, i64 %indvars.iv, i64 1
+  store i32 7, ptr %arraycidx   ; 1st instruction, to be widened.
   %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
-  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
+  %1 = load i16, ptr %cur.index ; 3rd, first-order-recurring load not widened.
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv3, %conv
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
index e2c696f9c18ad..1c58fb4fa1986 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll
@@ -12,7 +12,7 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-COST: Found an estimated cost of 1 for VF 4 For instruction:   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
 ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction:   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
 
-define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* nocapture noalias %pDst, i32 %blockSize) #0 {
+define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @saddsat(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -25,10 +25,12 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], -16
-; CHECK-NEXT:    [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET]], i64 0
@@ -36,20 +38,18 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <8 x i16>, <8 x i16>* [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]])
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10]])
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[NEXT_GEP6]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP8]], align 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i16, i16* [[NEXT_GEP6]], i64 8
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP7]], <8 x i16>* [[TMP10]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP6]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD8]], <8 x i16> [[BROADCAST_SPLAT10]])
+; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr [[NEXT_GEP6]], align 2
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP6]], i64 8
+; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -58,18 +58,18 @@ define void @saddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i16* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i16* [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi i16* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRC_ADDR_08]], i64 1
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[PSRC_ADDR_08]], align 2
+; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRC_ADDR_08]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[PSRC_ADDR_08]], align 2
 ; CHECK-NEXT:    [[TMP13:%.*]] = tail call i16 @llvm.sadd.sat.i16(i16 [[TMP12]], i16 [[OFFSET]])
-; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i16, i16* [[PDST_ADDR_07]], i64 1
-; CHECK-NEXT:    store i16 [[TMP13]], i16* [[PDST_ADDR_07]], align 2
+; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i16, ptr [[PDST_ADDR_07]], i64 1
+; CHECK-NEXT:    store i16 [[TMP13]], ptr [[PDST_ADDR_07]], align 2
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_09]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -82,13 +82,13 @@ entry:
 
 while.body:                                       ; preds = %entry, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pSrc.addr.08 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
-  %pDst.addr.07 = phi i16* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.08, i32 1
-  %0 = load i16, i16* %pSrc.addr.08, align 2
+  %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
+  %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1
+  %0 = load i16, ptr %pSrc.addr.08, align 2
   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
-  %incdec.ptr3 = getelementptr inbounds i16, i16* %pDst.addr.07, i32 1
-  store i16 %1, i16* %pDst.addr.07, align 2
+  %incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1
+  store i16 %1, ptr %pDst.addr.07, align 2
   %dec = add i32 %blkCnt.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body
@@ -104,7 +104,7 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction:   %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset)
 ; CHECK-COST: Found an estimated cost of 1 for VF 16 For instruction:   %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset)
 
-define void @umin(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocapture noalias %pDst, i32 %blockSize) #0 {
+define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @umin(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -127,73 +127,67 @@ define void @umin(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocaptur
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 16
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]])
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7]])
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[NEXT_GEP3]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* [[TMP8]], align 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i64 16
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP7]], <16 x i8>* [[TMP10]], align 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD5]], <16 x i8> [[BROADCAST_SPLAT7]])
+; CHECK-NEXT:    store <16 x i8> [[TMP4]], ptr [[NEXT_GEP3]], align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 16
+; CHECK-NEXT:    store <16 x i8> [[TMP5]], ptr [[TMP6]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[IND_END16:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END13:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[CAST_VTC10:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT:    [[IND_END11:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC10]]
+; CHECK-NEXT:    [[IND_END20:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END17:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[DOTCAST13:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END14:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST13]]
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[TMP2]], 24
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
 ; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[N_VEC9:%.*]] = and i64 [[TMP2]], -8
-; CHECK-NEXT:    [[CAST_VTC:%.*]] = trunc i64 [[N_VEC9]] to i32
-; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]]
-; CHECK-NEXT:    [[IND_END12:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC9]]
-; CHECK-NEXT:    [[IND_END15:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC9]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT23:%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT24:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT23]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[N_VEC11:%.*]] = and i64 [[TMP2]], -8
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC11]] to i32
+; CHECK-NEXT:    [[IND_END12:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
+; CHECK-NEXT:    [[IND_END16:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC11]]
+; CHECK-NEXT:    [[IND_END19:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC11]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <8 x i8> poison, i8 [[OFFSET]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT28:%.*]] = shufflevector <8 x i8> [[BROADCAST_SPLATINSERT27]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX19:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP20:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[INDEX19]]
-; CHECK-NEXT:    [[NEXT_GEP21:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[INDEX19]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[NEXT_GEP20]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD22:%.*]] = load <8 x i8>, <8 x i8>* [[TMP12]], align 2
-; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD22]], <8 x i8> [[BROADCAST_SPLAT24]])
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[NEXT_GEP21]] to <8 x i8>*
-; CHECK-NEXT:    store <8 x i8> [[TMP13]], <8 x i8>* [[TMP14]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT25]] = add nuw i64 [[INDEX19]], 8
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC9]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[INDEX23:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP24:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX23]]
+; CHECK-NEXT:    [[NEXT_GEP25:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX23]]
+; CHECK-NEXT:    [[WIDE_LOAD26:%.*]] = load <8 x i8>, ptr [[NEXT_GEP24]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[WIDE_LOAD26]], <8 x i8> [[BROADCAST_SPLAT28]])
+; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr [[NEXT_GEP25]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT29]] = add nuw i64 [[INDEX23]], 8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC11]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N18:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC9]]
-; CHECK-NEXT:    br i1 [[CMP_N18]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N22:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC11]]
+; CHECK-NEXT:    br i1 [[CMP_N22]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL14:%.*]] = phi i8* [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL17:%.*]] = phi i8* [ [[IND_END15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END16]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL15:%.*]] = phi i32 [ [[IND_END12]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL18:%.*]] = phi ptr [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL21:%.*]] = phi ptr [ [[IND_END19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END20]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi i8* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL17]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PSRC_ADDR_08]], i64 1
-; CHECK-NEXT:    [[TMP16:%.*]] = load i8, i8* [[PSRC_ADDR_08]], align 2
-; CHECK-NEXT:    [[TMP17:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TMP16]], i8 [[OFFSET]])
-; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PDST_ADDR_07]], i64 1
-; CHECK-NEXT:    store i8 [[TMP17]], i8* [[PDST_ADDR_07]], align 2
+; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL15]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL18]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL21]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_08]], i64 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[PSRC_ADDR_08]], align 2
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TMP10]], i8 [[OFFSET]])
+; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PDST_ADDR_07]], i64 1
+; CHECK-NEXT:    store i8 [[TMP11]], ptr [[PDST_ADDR_07]], align 2
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_09]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -206,13 +200,13 @@ entry:
 
 while.body:                                       ; preds = %entry, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pSrc.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
-  %pDst.addr.07 = phi i8* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %pSrc.addr.08, i32 1
-  %0 = load i8, i8* %pSrc.addr.08, align 2
+  %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
+  %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.08, i32 1
+  %0 = load i8, ptr %pSrc.addr.08, align 2
   %1 = tail call i8 @llvm.umin.i8(i8 %0, i8 %offset)
-  %incdec.ptr3 = getelementptr inbounds i8, i8* %pDst.addr.07, i32 1
-  store i8 %1, i8* %pDst.addr.07, align 2
+  %incdec.ptr3 = getelementptr inbounds i8, ptr %pDst.addr.07, i32 1
+  store i8 %1, ptr %pDst.addr.07, align 2
   %dec = add i32 %blkCnt.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body
@@ -224,3 +218,5 @@ while.end:                                        ; preds = %while.body, %entry
 declare i16 @llvm.sadd.sat.i16(i16, i16)
 declare i8 @llvm.umin.i8(i8, i8)
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-COST: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll
index a3d053082e1f8..d40115b81d6d9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll
@@ -16,7 +16,7 @@ target triple = "aarch64-none-unknown-eabi"
 ; COST: LV: Found an estimated cost of 3000000 for VF 16 For instruction:   %0 = load
 ; COST: LV: Selecting VF: 1.
 
-define i32 @test(i8* nocapture noundef readonly %pInVec, i8* nocapture noundef readonly %pInA1, i8* nocapture noundef readonly %pInA2, i8* nocapture noundef readonly %pInA3, i8* nocapture noundef readonly %pInA4, i32 noundef %numCols) {
+define i32 @test(ptr nocapture noundef readonly %pInVec, ptr nocapture noundef readonly %pInA1, ptr nocapture noundef readonly %pInA2, ptr nocapture noundef readonly %pInA3, ptr nocapture noundef readonly %pInA4, i32 noundef %numCols) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[NUMCOLS:%.*]], 3
@@ -25,36 +25,36 @@ define i32 @test(i8* nocapture noundef readonly %pInVec, i8* nocapture noundef r
 ; CHECK:       while.body.preheader:
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[PINVEC_ADDR_042:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PINVEC:%.*]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PINVEC_ADDR_042:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PINVEC:%.*]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[SUM4_041:%.*]] = phi i32 [ [[ADD14:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[SUM3_040:%.*]] = phi i32 [ [[ADD10:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[SUM2_039:%.*]] = phi i32 [ [[ADD6:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[SUM1_038:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[COLCNT_037:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[AND]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[PINA1_ADDR_036:%.*]] = phi i8* [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PINA1:%.*]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[PINA4_ADDR_035:%.*]] = phi i8* [ [[INCDEC_PTR11:%.*]], [[WHILE_BODY]] ], [ [[PINA4:%.*]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[PINA3_ADDR_034:%.*]] = phi i8* [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[PINA3:%.*]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[PINA2_ADDR_033:%.*]] = phi i8* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[PINA2:%.*]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PINVEC_ADDR_042]], i64 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[PINVEC_ADDR_042]], align 1
+; CHECK-NEXT:    [[PINA1_ADDR_036:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PINA1:%.*]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PINA4_ADDR_035:%.*]] = phi ptr [ [[INCDEC_PTR11:%.*]], [[WHILE_BODY]] ], [ [[PINA4:%.*]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PINA3_ADDR_034:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[PINA3:%.*]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[PINA2_ADDR_033:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[PINA2:%.*]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PINVEC_ADDR_042]], i64 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PINVEC_ADDR_042]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
-; CHECK-NEXT:    [[INCDEC_PTR1]] = getelementptr inbounds i8, i8* [[PINA1_ADDR_036]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[PINA1_ADDR_036]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PINA1_ADDR_036]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[PINA1_ADDR_036]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[SUM1_038]]
-; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PINA2_ADDR_033]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[PINA2_ADDR_033]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PINA2_ADDR_033]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[PINA2_ADDR_033]], align 1
 ; CHECK-NEXT:    [[CONV4:%.*]] = sext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[CONV4]], [[CONV]]
 ; CHECK-NEXT:    [[ADD6]] = add nsw i32 [[MUL5]], [[SUM2_039]]
-; CHECK-NEXT:    [[INCDEC_PTR7]] = getelementptr inbounds i8, i8* [[PINA3_ADDR_034]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[PINA3_ADDR_034]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR7]] = getelementptr inbounds i8, ptr [[PINA3_ADDR_034]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[PINA3_ADDR_034]], align 1
 ; CHECK-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i32 [[CONV8]], [[CONV]]
 ; CHECK-NEXT:    [[ADD10]] = add nsw i32 [[MUL9]], [[SUM3_040]]
-; CHECK-NEXT:    [[INCDEC_PTR11]] = getelementptr inbounds i8, i8* [[PINA4_ADDR_035]], i64 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[PINA4_ADDR_035]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR11]] = getelementptr inbounds i8, ptr [[PINA4_ADDR_035]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[PINA4_ADDR_035]], align 1
 ; CHECK-NEXT:    [[CONV12:%.*]] = sext i8 [[TMP4]] to i32
 ; CHECK-NEXT:    [[MUL13:%.*]] = mul nsw i32 [[CONV12]], [[CONV]]
 ; CHECK-NEXT:    [[ADD14]] = add nsw i32 [[MUL13]], [[SUM4_041]]
@@ -80,36 +80,36 @@ entry:
   br i1 %cmp.not32, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
-  %pInVec.addr.042 = phi i8* [ %incdec.ptr, %while.body ], [ %pInVec, %entry ]
+  %pInVec.addr.042 = phi ptr [ %incdec.ptr, %while.body ], [ %pInVec, %entry ]
   %sum4.041 = phi i32 [ %add14, %while.body ], [ 0, %entry ]
   %sum3.040 = phi i32 [ %add10, %while.body ], [ 0, %entry ]
   %sum2.039 = phi i32 [ %add6, %while.body ], [ 0, %entry ]
   %sum1.038 = phi i32 [ %add, %while.body ], [ 0, %entry ]
   %colCnt.037 = phi i32 [ %dec, %while.body ], [ %and, %entry ]
-  %pInA1.addr.036 = phi i8* [ %incdec.ptr1, %while.body ], [ %pInA1, %entry ]
-  %pInA4.addr.035 = phi i8* [ %incdec.ptr11, %while.body ], [ %pInA4, %entry ]
-  %pInA3.addr.034 = phi i8* [ %incdec.ptr7, %while.body ], [ %pInA3, %entry ]
-  %pInA2.addr.033 = phi i8* [ %incdec.ptr3, %while.body ], [ %pInA2, %entry ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %pInVec.addr.042, i64 1
-  %0 = load i8, i8* %pInVec.addr.042, align 1
+  %pInA1.addr.036 = phi ptr [ %incdec.ptr1, %while.body ], [ %pInA1, %entry ]
+  %pInA4.addr.035 = phi ptr [ %incdec.ptr11, %while.body ], [ %pInA4, %entry ]
+  %pInA3.addr.034 = phi ptr [ %incdec.ptr7, %while.body ], [ %pInA3, %entry ]
+  %pInA2.addr.033 = phi ptr [ %incdec.ptr3, %while.body ], [ %pInA2, %entry ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %pInVec.addr.042, i64 1
+  %0 = load i8, ptr %pInVec.addr.042, align 1
   %conv = sext i8 %0 to i32
-  %incdec.ptr1 = getelementptr inbounds i8, i8* %pInA1.addr.036, i64 1
-  %1 = load i8, i8* %pInA1.addr.036, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, ptr %pInA1.addr.036, i64 1
+  %1 = load i8, ptr %pInA1.addr.036, align 1
   %conv2 = sext i8 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add nsw i32 %mul, %sum1.038
-  %incdec.ptr3 = getelementptr inbounds i8, i8* %pInA2.addr.033, i64 1
-  %2 = load i8, i8* %pInA2.addr.033, align 1
+  %incdec.ptr3 = getelementptr inbounds i8, ptr %pInA2.addr.033, i64 1
+  %2 = load i8, ptr %pInA2.addr.033, align 1
   %conv4 = sext i8 %2 to i32
   %mul5 = mul nsw i32 %conv4, %conv
   %add6 = add nsw i32 %mul5, %sum2.039
-  %incdec.ptr7 = getelementptr inbounds i8, i8* %pInA3.addr.034, i64 1
-  %3 = load i8, i8* %pInA3.addr.034, align 1
+  %incdec.ptr7 = getelementptr inbounds i8, ptr %pInA3.addr.034, i64 1
+  %3 = load i8, ptr %pInA3.addr.034, align 1
   %conv8 = sext i8 %3 to i32
   %mul9 = mul nsw i32 %conv8, %conv
   %add10 = add nsw i32 %mul9, %sum3.040
-  %incdec.ptr11 = getelementptr inbounds i8, i8* %pInA4.addr.035, i64 1
-  %4 = load i8, i8* %pInA4.addr.035, align 1
+  %incdec.ptr11 = getelementptr inbounds i8, ptr %pInA4.addr.035, i64 1
+  %4 = load i8, ptr %pInA4.addr.035, align 1
   %conv12 = sext i8 %4 to i32
   %mul13 = mul nsw i32 %conv12, %conv
   %add14 = add nsw i32 %mul13, %sum4.041

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll
index 8f5d796667745..75f03c7b1a699 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/nontemporal-load-store.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphones -force-vector-width=4 -force-vector-interleave=1 %s -S | FileCheck %s
 
 ; Vectors with i4 elements may not legal with nontemporal stores.
-define void @test_i4_store(i4* %ddst) {
+define void @test_i4_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_i4_store(
 ; CHECK-NOT:   vector.body:
 ; CHECK:        ret void
@@ -11,9 +11,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i4* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i4, i4* %ddst.addr, i64 1
-  store i4 10, i4* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i4, ptr %ddst.addr, i64 1
+  store i4 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -22,7 +22,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i8_store(i8* %ddst) {
+define void @test_i8_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_i8_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x i8> {{.*}} !nontemporal !0
@@ -33,9 +33,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i8* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %ddst.addr, i64 1
-  store i8 10, i8* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %ddst.addr, i64 1
+  store i8 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -44,7 +44,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_half_store(half* %ddst) {
+define void @test_half_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_half_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x half> {{.*}} !nontemporal !0
@@ -55,9 +55,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi half* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds half, half* %ddst.addr, i64 1
-  store half 10.0, half* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds half, ptr %ddst.addr, i64 1
+  store half 10.0, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -66,7 +66,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i16_store(i16* %ddst) {
+define void @test_i16_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_i16_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x i16> {{.*}} !nontemporal !0
@@ -77,9 +77,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i16* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %ddst.addr, i64 1
-  store i16 10, i16* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %ddst.addr, i64 1
+  store i16 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -88,7 +88,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i32_store(i32* nocapture %ddst) {
+define void @test_i32_store(ptr nocapture %ddst) {
 ; CHECK-LABEL: define void @test_i32_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <16 x i32> {{.*}} !nontemporal !0
@@ -99,15 +99,15 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i32* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
-  %incdec.ptr = getelementptr inbounds i32, i32* %ddst.addr, i64 1
-  store i32 10, i32* %ddst.addr, align 4, !nontemporal !8
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %ddst.addr, i64 2
-  store i32 20, i32* %incdec.ptr, align 4, !nontemporal !8
-  %incdec.ptr2 = getelementptr inbounds i32, i32* %ddst.addr, i64 3
-  store i32 30, i32* %incdec.ptr1, align 4, !nontemporal !8
-  %incdec.ptr3 = getelementptr inbounds i32, i32* %ddst.addr, i64 4
-  store i32 40, i32* %incdec.ptr2, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32, ptr %ddst.addr, i64 1
+  store i32 10, ptr %ddst.addr, align 4, !nontemporal !8
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %ddst.addr, i64 2
+  store i32 20, ptr %incdec.ptr, align 4, !nontemporal !8
+  %incdec.ptr2 = getelementptr inbounds i32, ptr %ddst.addr, i64 3
+  store i32 30, ptr %incdec.ptr1, align 4, !nontemporal !8
+  %incdec.ptr3 = getelementptr inbounds i32, ptr %ddst.addr, i64 4
+  store i32 40, ptr %incdec.ptr2, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -116,7 +116,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i33_store(i33* nocapture %ddst) {
+define void @test_i33_store(ptr nocapture %ddst) {
 ; CHECK-LABEL: define void @test_i33_store(
 ; CHECK-NOT:   vector.body:
 ; CHECK:         ret
@@ -126,15 +126,15 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i33* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
-  %incdec.ptr = getelementptr inbounds i33, i33* %ddst.addr, i64 1
-  store i33 10, i33* %ddst.addr, align 4, !nontemporal !8
-  %incdec.ptr1 = getelementptr inbounds i33, i33* %ddst.addr, i64 2
-  store i33 20, i33* %incdec.ptr, align 4, !nontemporal !8
-  %incdec.ptr2 = getelementptr inbounds i33, i33* %ddst.addr, i64 3
-  store i33 30, i33* %incdec.ptr1, align 4, !nontemporal !8
-  %incdec.ptr3 = getelementptr inbounds i33, i33* %ddst.addr, i64 4
-  store i33 40, i33* %incdec.ptr2, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
+  %incdec.ptr = getelementptr inbounds i33, ptr %ddst.addr, i64 1
+  store i33 10, ptr %ddst.addr, align 4, !nontemporal !8
+  %incdec.ptr1 = getelementptr inbounds i33, ptr %ddst.addr, i64 2
+  store i33 20, ptr %incdec.ptr, align 4, !nontemporal !8
+  %incdec.ptr2 = getelementptr inbounds i33, ptr %ddst.addr, i64 3
+  store i33 30, ptr %incdec.ptr1, align 4, !nontemporal !8
+  %incdec.ptr3 = getelementptr inbounds i33, ptr %ddst.addr, i64 4
+  store i33 40, ptr %incdec.ptr2, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 3
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -143,7 +143,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i40_store(i40* nocapture %ddst) {
+define void @test_i40_store(ptr nocapture %ddst) {
 ; CHECK-LABEL: define void @test_i40_store(
 ; CHECK-NOT:   vector.body:
 ; CHECK:         ret
@@ -153,15 +153,15 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i40* [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
-  %incdec.ptr = getelementptr inbounds i40, i40* %ddst.addr, i64 1
-  store i40 10, i40* %ddst.addr, align 4, !nontemporal !8
-  %incdec.ptr1 = getelementptr inbounds i40, i40* %ddst.addr, i64 2
-  store i40 20, i40* %incdec.ptr, align 4, !nontemporal !8
-  %incdec.ptr2 = getelementptr inbounds i40, i40* %ddst.addr, i64 3
-  store i40 30, i40* %incdec.ptr1, align 4, !nontemporal !8
-  %incdec.ptr3 = getelementptr inbounds i40, i40* %ddst.addr, i64 4
-  store i40 40, i40* %incdec.ptr2, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ]
+  %incdec.ptr = getelementptr inbounds i40, ptr %ddst.addr, i64 1
+  store i40 10, ptr %ddst.addr, align 4, !nontemporal !8
+  %incdec.ptr1 = getelementptr inbounds i40, ptr %ddst.addr, i64 2
+  store i40 20, ptr %incdec.ptr, align 4, !nontemporal !8
+  %incdec.ptr2 = getelementptr inbounds i40, ptr %ddst.addr, i64 3
+  store i40 30, ptr %incdec.ptr1, align 4, !nontemporal !8
+  %incdec.ptr3 = getelementptr inbounds i40, ptr %ddst.addr, i64 4
+  store i40 40, ptr %incdec.ptr2, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 3
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -169,7 +169,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
-define void @test_i64_store(i64* nocapture %ddst) local_unnamed_addr #0 {
+define void @test_i64_store(ptr nocapture %ddst) local_unnamed_addr #0 {
 ; CHECK-LABEL: define void @test_i64_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x i64> {{.*}} !nontemporal !0
@@ -180,9 +180,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i64* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i64, i64* %ddst.addr, i64 1
-  store i64 10, i64* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i64, ptr %ddst.addr, i64 1
+  store i64 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -191,7 +191,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_double_store(double* %ddst) {
+define void @test_double_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_double_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x double> {{.*}} !nontemporal !0
@@ -202,9 +202,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi double* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds double, double* %ddst.addr, i64 1
-  store double 10.0, double* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds double, ptr %ddst.addr, i64 1
+  store double 10.0, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -213,7 +213,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i128_store(i128* %ddst) {
+define void @test_i128_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_i128_store(
 ; CHECK-LABEL: vector.body:
 ; CHECK:         store <4 x i128> {{.*}} !nontemporal !0
@@ -224,9 +224,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i128* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i128, i128* %ddst.addr, i64 1
-  store i128 10, i128* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i128, ptr %ddst.addr, i64 1
+  store i128 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -235,7 +235,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define void @test_i256_store(i256* %ddst) {
+define void @test_i256_store(ptr %ddst) {
 ; CHECK-LABEL: define void @test_i256_store(
 ; CHECK-NOT:   vector.body:
 ; CHECK:        ret void
@@ -245,9 +245,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %ddst.addr = phi i256* [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
-  %incdec.ptr = getelementptr inbounds i256, i256* %ddst.addr, i64 1
-  store i256 10, i256* %ddst.addr, align 4, !nontemporal !8
+  %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i256, ptr %ddst.addr, i64 1
+  store i256 10, ptr %ddst.addr, align 4, !nontemporal !8
   %add = add nuw nsw i32 %i, 4
   %cmp = icmp ult i32 %i, 4092
   br i1 %cmp, label %for.body, label %for.cond.cleanup
@@ -256,7 +256,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-define i4 @test_i4_load(i4* %ddst) {
+define i4 @test_i4_load(ptr %ddst) {
 ; CHECK-LABEL: define i4 @test_i4_load
 ; CHECK-NOT: vector.body:
 ; CHECk: ret i4 %{{.*}}
@@ -267,8 +267,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i4, i4* %ddst, i64 %indvars.iv
-  %l = load i4, i4* %arrayidx, align 1, !nontemporal !8
+  %arrayidx = getelementptr inbounds i4, ptr %ddst, i64 %indvars.iv
+  %l = load i4, ptr %arrayidx, align 1, !nontemporal !8
   %add = add i4 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -278,10 +278,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i4 %add
 }
 
-define i8 @test_load_i8(i8* %ddst) {
+define i8 @test_load_i8(ptr %ddst) {
 ; CHECK-LABEL: @test_load_i8(
 ; CHECK:   vector.body:
-; CHECK: load <4 x i8>, <4 x i8>* {{.*}}, align 1, !nontemporal !0
+; CHECK: load <4 x i8>, ptr {{.*}}, align 1, !nontemporal !0
 ; CHECk: ret i8 %{{.*}}
 ;
 entry:
@@ -290,8 +290,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %ddst, i64 %indvars.iv
-  %l = load i8, i8* %arrayidx, align 1, !nontemporal !8
+  %arrayidx = getelementptr inbounds i8, ptr %ddst, i64 %indvars.iv
+  %l = load i8, ptr %arrayidx, align 1, !nontemporal !8
   %add = add i8 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -301,10 +301,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i8 %add
 }
 
-define half @test_half_load(half* %ddst) {
+define half @test_half_load(ptr %ddst) {
 ; CHECK-LABEL: @test_half_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x half>, <4 x half>* {{.*}}, align 2, !nontemporal !0
+; CHECK: load <4 x half>, ptr {{.*}}, align 2, !nontemporal !0
 ; CHECk: ret half %{{.*}}
 ;
 entry:
@@ -313,8 +313,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds half, half* %ddst, i64 %indvars.iv
-  %l = load half, half* %arrayidx, align 2, !nontemporal !8
+  %arrayidx = getelementptr inbounds half, ptr %ddst, i64 %indvars.iv
+  %l = load half, ptr %arrayidx, align 2, !nontemporal !8
   %add = fadd half %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -324,10 +324,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret half %add
 }
 
-define i16 @test_i16_load(i16* %ddst) {
+define i16 @test_i16_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i16_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x i16>, <4 x i16>* {{.*}}, align 2, !nontemporal !0
+; CHECK: load <4 x i16>, ptr {{.*}}, align 2, !nontemporal !0
 ; CHECk: ret i16 %{{.*}}
 ;
 entry:
@@ -336,8 +336,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %ddst, i64 %indvars.iv
-  %l = load i16, i16* %arrayidx, align 2, !nontemporal !8
+  %arrayidx = getelementptr inbounds i16, ptr %ddst, i64 %indvars.iv
+  %l = load i16, ptr %arrayidx, align 2, !nontemporal !8
   %add = add i16 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -347,10 +347,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i16 %add
 }
 
-define i32 @test_i32_load(i32* %ddst) {
+define i32 @test_i32_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i32_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x i32>, <4 x i32>* {{.*}}, align 4, !nontemporal !0
+; CHECK: load <4 x i32>, ptr {{.*}}, align 4, !nontemporal !0
 ; CHECk: ret i32 %{{.*}}
 ;
 entry:
@@ -359,8 +359,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %ddst, i64 %indvars.iv
-  %l = load i32, i32* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i32, ptr %ddst, i64 %indvars.iv
+  %l = load i32, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i32 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -370,7 +370,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i32 %add
 }
 
-define i33 @test_i33_load(i33* %ddst) {
+define i33 @test_i33_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i33_load
 ; CHECK-NOT:   vector.body:
 ; CHECk: ret i33 %{{.*}}
@@ -381,8 +381,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i33, i33* %ddst, i64 %indvars.iv
-  %l = load i33, i33* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i33, ptr %ddst, i64 %indvars.iv
+  %l = load i33, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i33 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -392,7 +392,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i33 %add
 }
 
-define i40 @test_i40_load(i40* %ddst) {
+define i40 @test_i40_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i40_load
 ; CHECK-NOT:   vector.body:
 ; CHECk: ret i40 %{{.*}}
@@ -403,8 +403,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i40, i40* %ddst, i64 %indvars.iv
-  %l = load i40, i40* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i40, ptr %ddst, i64 %indvars.iv
+  %l = load i40, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i40 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -414,10 +414,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i40 %add
 }
 
-define i64 @test_i64_load(i64* %ddst) {
+define i64 @test_i64_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i64_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x i64>, <4 x i64>* {{.*}}, align 4, !nontemporal !0
+; CHECK: load <4 x i64>, ptr {{.*}}, align 4, !nontemporal !0
 ; CHECk: ret i64 %{{.*}}
 ;
 entry:
@@ -426,8 +426,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %ddst, i64 %indvars.iv
-  %l = load i64, i64* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i64, ptr %ddst, i64 %indvars.iv
+  %l = load i64, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i64 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -437,10 +437,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i64 %add
 }
 
-define double @test_double_load(double* %ddst) {
+define double @test_double_load(ptr %ddst) {
 ; CHECK-LABEL: @test_double_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x double>, <4 x double>* {{.*}}, align 4, !nontemporal !0
+; CHECK: load <4 x double>, ptr {{.*}}, align 4, !nontemporal !0
 ; CHECk: ret double %{{.*}}
 ;
 entry:
@@ -449,8 +449,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %ddst, i64 %indvars.iv
-  %l = load double, double* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds double, ptr %ddst, i64 %indvars.iv
+  %l = load double, ptr %arrayidx, align 4, !nontemporal !8
   %add = fadd double %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -460,10 +460,10 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret double %add
 }
 
-define i128 @test_i128_load(i128* %ddst) {
+define i128 @test_i128_load(ptr %ddst) {
 ; CHECK-LABEL: @test_i128_load
 ; CHECK-LABEL:   vector.body:
-; CHECK: load <4 x i128>, <4 x i128>* {{.*}}, align 4, !nontemporal !0
+; CHECK: load <4 x i128>, ptr {{.*}}, align 4, !nontemporal !0
 ; CHECk: ret i128 %{{.*}}
 ;
 entry:
@@ -472,8 +472,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ddst, i64 %indvars.iv
-  %l = load i128, i128* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i128, ptr %ddst, i64 %indvars.iv
+  %l = load i128, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i128 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092
@@ -483,7 +483,7 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret i128 %add
 }
 
-define i256 @test_256_load(i256* %ddst) {
+define i256 @test_256_load(ptr %ddst) {
 ; CHECK-LABEL: @test_256_load
 ; CHECK-NOT:   vector.body:
 ; CHECk: ret i256 %{{.*}}
@@ -494,8 +494,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i256, i256* %ddst, i64 %indvars.iv
-  %l = load i256, i256* %arrayidx, align 4, !nontemporal !8
+  %arrayidx = getelementptr inbounds i256, ptr %ddst, i64 %indvars.iv
+  %l = load i256, ptr %arrayidx, align 4, !nontemporal !8
   %add = add i256 %l, %acc.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 4092

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll
index 6379c4abea144..2849c83974438 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll
@@ -9,12 +9,12 @@ target triple = "aarch64-arm-none-eabi"
 ; It should also not be interleaved as the predicated interleaving will just
 ; create less efficient code.
 
-define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noundef %srcALen, half* nocapture noundef readonly %pSrcB, i32 noundef %srcBLen, half* nocapture noundef writeonly %pDst) {
+define void @arm_correlate_f16(ptr nocapture noundef readonly %pSrcA, i32 noundef %srcALen, ptr nocapture noundef readonly %pSrcB, i32 noundef %srcBLen, ptr nocapture noundef writeonly %pDst) {
 ; CHECK-LABEL: @arm_correlate_f16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[SRCBLEN:%.*]], -1
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[SUB]] to i64
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds half, half* [[PSRCB:%.*]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds half, ptr [[PSRCB:%.*]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SRCALEN:%.*]], -2
 ; CHECK-NEXT:    [[SUB1:%.*]] = add i32 [[ADD]], [[SRCBLEN]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SRCALEN]], [[SRCBLEN]]
@@ -22,7 +22,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[SUB2:%.*]] = sub i32 [[SRCALEN]], [[SRCBLEN]]
 ; CHECK-NEXT:    [[IDX_EXT3:%.*]] = zext i32 [[SUB2]] to i64
-; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds half, half* [[PDST:%.*]], i64 [[IDX_EXT3]]
+; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds half, ptr [[PDST:%.*]], i64 [[IDX_EXT3]]
 ; CHECK-NEXT:    br label [[IF_END12:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult i32 [[SRCALEN]], [[SRCBLEN]]
@@ -30,16 +30,16 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 ; CHECK:       if.then6:
 ; CHECK-NEXT:    [[SUB7:%.*]] = add i32 [[SRCALEN]], -1
 ; CHECK-NEXT:    [[IDX_EXT8:%.*]] = zext i32 [[SUB7]] to i64
-; CHECK-NEXT:    [[ADD_PTR9:%.*]] = getelementptr inbounds half, half* [[PSRCA:%.*]], i64 [[IDX_EXT8]]
+; CHECK-NEXT:    [[ADD_PTR9:%.*]] = getelementptr inbounds half, ptr [[PSRCA:%.*]], i64 [[IDX_EXT8]]
 ; CHECK-NEXT:    [[IDX_EXT10:%.*]] = zext i32 [[SUB1]] to i64
-; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds half, half* [[PDST]], i64 [[IDX_EXT10]]
+; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds half, ptr [[PDST]], i64 [[IDX_EXT10]]
 ; CHECK-NEXT:    br label [[IF_END12]]
 ; CHECK:       if.end12:
 ; CHECK-NEXT:    [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN]], [[IF_THEN]] ], [ [[SRCBLEN]], [[IF_THEN6]] ], [ [[SRCALEN]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_THEN6]] ], [ [[SRCBLEN]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[PDST_ADDR_0:%.*]] = phi half* [ [[ADD_PTR4]], [[IF_THEN]] ], [ [[ADD_PTR11]], [[IF_THEN6]] ], [ [[PDST]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[PIN1_0:%.*]] = phi half* [ [[PSRCA]], [[IF_THEN]] ], [ [[PSRCB]], [[IF_THEN6]] ], [ [[PSRCA]], [[IF_ELSE]] ]
-; CHECK-NEXT:    [[PIN2_0:%.*]] = phi half* [ [[ADD_PTR]], [[IF_THEN]] ], [ [[ADD_PTR9]], [[IF_THEN6]] ], [ [[ADD_PTR]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[PDST_ADDR_0:%.*]] = phi ptr [ [[ADD_PTR4]], [[IF_THEN]] ], [ [[ADD_PTR11]], [[IF_THEN6]] ], [ [[PDST]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[PIN1_0:%.*]] = phi ptr [ [[PSRCA]], [[IF_THEN]] ], [ [[PSRCB]], [[IF_THEN6]] ], [ [[PSRCA]], [[IF_ELSE]] ]
+; CHECK-NEXT:    [[PIN2_0:%.*]] = phi ptr [ [[ADD_PTR]], [[IF_THEN]] ], [ [[ADD_PTR9]], [[IF_THEN6]] ], [ [[ADD_PTR]], [[IF_ELSE]] ]
 ; CHECK-NEXT:    [[CMP27:%.*]] = phi i64 [ 1, [[IF_THEN]] ], [ -1, [[IF_THEN6]] ], [ 1, [[IF_ELSE]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[SRCBLEN]], [[SRCALEN]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], -1
@@ -47,7 +47,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 ; CHECK:       for.cond14.preheader:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 1, [[IF_END12]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_END:%.*]] ]
 ; CHECK-NEXT:    [[I_077:%.*]] = phi i32 [ 0, [[IF_END12]] ], [ [[INC33:%.*]], [[FOR_END]] ]
-; CHECK-NEXT:    [[PDST_ADDR_176:%.*]] = phi half* [ [[PDST_ADDR_0]], [[IF_END12]] ], [ [[PDST_ADDR_2:%.*]], [[FOR_END]] ]
+; CHECK-NEXT:    [[PDST_ADDR_176:%.*]] = phi ptr [ [[PDST_ADDR_0]], [[IF_END12]] ], [ [[PDST_ADDR_2:%.*]], [[FOR_END]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY16:%.*]]
 ; CHECK:       for.body16:
 ; CHECK-NEXT:    [[J_074:%.*]] = phi i32 [ 0, [[FOR_COND14_PREHEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
@@ -59,12 +59,12 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[IF_THEN20:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then20:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[J_074]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, half* [[PIN1_0]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load half, half* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[PIN1_0]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[SUB22:%.*]] = sub nsw i32 0, [[SUB17]]
 ; CHECK-NEXT:    [[IDXPROM23:%.*]] = sext i32 [[SUB22]] to i64
-; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds half, half* [[PIN2_0]], i64 [[IDXPROM23]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load half, half* [[ARRAYIDX24]], align 2
+; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds half, ptr [[PIN2_0]], i64 [[IDXPROM23]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[ARRAYIDX24]], align 2
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast half [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[ADD25:%.*]] = fadd fast half [[MUL]], [[SUM_073]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
@@ -75,8 +75,8 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY16]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi half [ [[SUM_1]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[PDST_ADDR_2]] = getelementptr inbounds half, half* [[PDST_ADDR_176]], i64 [[CMP27]]
-; CHECK-NEXT:    store half [[SUM_1_LCSSA]], half* [[PDST_ADDR_176]], align 2
+; CHECK-NEXT:    [[PDST_ADDR_2]] = getelementptr inbounds half, ptr [[PDST_ADDR_176]], i64 [[CMP27]]
+; CHECK-NEXT:    store half [[SUM_1_LCSSA]], ptr [[PDST_ADDR_176]], align 2
 ; CHECK-NEXT:    [[INC33]] = add nuw i32 [[I_077]], 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND78_NOT:%.*]] = icmp eq i32 [[INC33]], [[TMP1]]
@@ -87,7 +87,7 @@ define void @arm_correlate_f16(half* nocapture noundef readonly %pSrcA, i32 noun
 entry:
   %sub = add i32 %srcBLen, -1
   %idx.ext = zext i32 %sub to i64
-  %add.ptr = getelementptr inbounds half, half* %pSrcB, i64 %idx.ext
+  %add.ptr = getelementptr inbounds half, ptr %pSrcB, i64 %idx.ext
   %add = add i32 %srcALen, -2
   %sub1 = add i32 %add, %srcBLen
   %cmp = icmp ugt i32 %srcALen, %srcBLen
@@ -96,7 +96,7 @@ entry:
 if.then:                                          ; preds = %entry
   %sub2 = sub i32 %srcALen, %srcBLen
   %idx.ext3 = zext i32 %sub2 to i64
-  %add.ptr4 = getelementptr inbounds half, half* %pDst, i64 %idx.ext3
+  %add.ptr4 = getelementptr inbounds half, ptr %pDst, i64 %idx.ext3
   br label %if.end12
 
 if.else:                                          ; preds = %entry
@@ -106,17 +106,17 @@ if.else:                                          ; preds = %entry
 if.then6:                                         ; preds = %if.else
   %sub7 = add i32 %srcALen, -1
   %idx.ext8 = zext i32 %sub7 to i64
-  %add.ptr9 = getelementptr inbounds half, half* %pSrcA, i64 %idx.ext8
+  %add.ptr9 = getelementptr inbounds half, ptr %pSrcA, i64 %idx.ext8
   %idx.ext10 = zext i32 %sub1 to i64
-  %add.ptr11 = getelementptr inbounds half, half* %pDst, i64 %idx.ext10
+  %add.ptr11 = getelementptr inbounds half, ptr %pDst, i64 %idx.ext10
   br label %if.end12
 
 if.end12:                                         ; preds = %if.else, %if.then6, %if.then
   %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.then6 ], [ %srcALen, %if.else ]
   %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.then6 ], [ %srcBLen, %if.else ]
-  %pDst.addr.0 = phi half* [ %add.ptr4, %if.then ], [ %add.ptr11, %if.then6 ], [ %pDst, %if.else ]
-  %pIn1.0 = phi half* [ %pSrcA, %if.then ], [ %pSrcB, %if.then6 ], [ %pSrcA, %if.else ]
-  %pIn2.0 = phi half* [ %add.ptr, %if.then ], [ %add.ptr9, %if.then6 ], [ %add.ptr, %if.else ]
+  %pDst.addr.0 = phi ptr [ %add.ptr4, %if.then ], [ %add.ptr11, %if.then6 ], [ %pDst, %if.else ]
+  %pIn1.0 = phi ptr [ %pSrcA, %if.then ], [ %pSrcB, %if.then6 ], [ %pSrcA, %if.else ]
+  %pIn2.0 = phi ptr [ %add.ptr, %if.then ], [ %add.ptr9, %if.then6 ], [ %add.ptr, %if.else ]
   %cmp27 = phi i64 [ 1, %if.then ], [ -1, %if.then6 ], [ 1, %if.else ]
   %0 = add i32 %srcBLen, %srcALen
   %1 = add i32 %0, -1
@@ -125,7 +125,7 @@ if.end12:                                         ; preds = %if.else, %if.then6,
 for.cond14.preheader:                             ; preds = %if.end12, %for.end
   %indvars.iv = phi i32 [ 1, %if.end12 ], [ %indvars.iv.next, %for.end ]
   %i.077 = phi i32 [ 0, %if.end12 ], [ %inc33, %for.end ]
-  %pDst.addr.176 = phi half* [ %pDst.addr.0, %if.end12 ], [ %pDst.addr.2, %for.end ]
+  %pDst.addr.176 = phi ptr [ %pDst.addr.0, %if.end12 ], [ %pDst.addr.2, %for.end ]
   br label %for.body16
 
 for.body16:                                       ; preds = %for.cond14.preheader, %for.inc
@@ -139,12 +139,12 @@ for.body16:                                       ; preds = %for.cond14.preheade
 
 if.then20:                                        ; preds = %for.body16
   %idxprom = zext i32 %j.074 to i64
-  %arrayidx = getelementptr inbounds half, half* %pIn1.0, i64 %idxprom
-  %2 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %pIn1.0, i64 %idxprom
+  %2 = load half, ptr %arrayidx, align 2
   %sub22 = sub nsw i32 0, %sub17
   %idxprom23 = sext i32 %sub22 to i64
-  %arrayidx24 = getelementptr inbounds half, half* %pIn2.0, i64 %idxprom23
-  %3 = load half, half* %arrayidx24, align 2
+  %arrayidx24 = getelementptr inbounds half, ptr %pIn2.0, i64 %idxprom23
+  %3 = load half, ptr %arrayidx24, align 2
   %mul = fmul fast half %3, %2
   %add25 = fadd fast half %mul, %sum.073
   br label %for.inc
@@ -157,8 +157,8 @@ for.inc:                                          ; preds = %for.body16, %if.the
 
 for.end:                                          ; preds = %for.inc
   %sum.1.lcssa = phi half [ %sum.1, %for.inc ]
-  %pDst.addr.2 = getelementptr inbounds half, half* %pDst.addr.176, i64 %cmp27
-  store half %sum.1.lcssa, half* %pDst.addr.176, align 2
+  %pDst.addr.2 = getelementptr inbounds half, ptr %pDst.addr.176, i64 %cmp27
+  store half %sum.1.lcssa, ptr %pDst.addr.176, align 2
   %inc33 = add nuw i32 %i.077, 1
   %indvars.iv.next = add i32 %indvars.iv, 1
   %exitcond78.not = icmp eq i32 %inc33, %1

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
index dcd3d96416a55..342b37710b653 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
@@ -4,24 +4,24 @@
 target triple = "aarch64-linux-gnu"
 
 ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
-define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_sve_i128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_sve_i128
 ; CHECK: vector.body
-; CHECK:  %[[LOAD1:.*]] = load i128, i128* {{.*}}
-; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
+; CHECK:  %[[LOAD1:.*]] = load i128, ptr {{.*}}
+; CHECK-NEXT: %[[LOAD2:.*]] = load i128, ptr {{.*}}
 ; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
 ; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
-; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
-; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
+; CHECK-NEXT: store i128 %[[ADD1]], ptr {{.*}}
+; CHECK-NEXT: store i128 %[[ADD2]], ptr {{.*}}
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  %0 = load i128, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  %0 = load i128, ptr %arrayidx, align 16
   %add = add nsw i128 %0, 42
-  store i128 %add, i128* %arrayidx, align 16
+  store i128 %add, ptr %arrayidx, align 16
   %iv.next = add i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -31,24 +31,24 @@ for.end:
 }
 
 ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
-define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_sve_f128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_sve_f128
 ; CHECK: vector.body
-; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
-; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
+; CHECK: %[[LOAD1:.*]] = load fp128, ptr
+; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, ptr
 ; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
 ; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
-; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
-; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
+; CHECK-NEXT: store fp128 %[[FSUB1]], ptr {{.*}}
+; CHECK-NEXT: store fp128 %[[FSUB2]], ptr {{.*}}
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
-  %0 = load fp128, fp128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds fp128, ptr %ptr, i64 %iv
+  %0 = load fp128, ptr %arrayidx, align 16
   %add = fsub fp128 %0, 0xL00000000000000008000000000000000
-  store fp128 %add, fp128* %arrayidx, align 16
+  store fp128 %add, ptr %arrayidx, align 16
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -58,20 +58,20 @@ for.end:
 }
 
 ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
-define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
+define dso_local void @loop_invariant_sve_i128(ptr nocapture %ptr, i128 %val, i64 %N) {
 ; CHECK-LABEL: @loop_invariant_sve_i128
 ; CHECK: vector.body
-; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
-; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
-; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
-; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
+; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, ptr %ptr
+; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, ptr %ptr
+; CHECK-NEXT: store i128 %val, ptr %[[GEP1]]
+; CHECK-NEXT: store i128 %val, ptr %[[GEP2]]
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  store i128 %val, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  store i128 %val, ptr %arrayidx, align 16
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -81,25 +81,25 @@ for.end:
 }
 
 ; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
-define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
+define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
 ; CHECK-LABEL: @uniform_store_i1
 ; CHECK: vector.body
-; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x i64*> {{.*}}, i64 1
-; CHECK: %[[ICMP:.*]] = icmp eq <64 x i64*> %[[GEP]], %[[SPLAT:.*]]
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, <64 x ptr> {{.*}}, i64 1
+; CHECK: %[[ICMP:.*]] = icmp eq <64 x ptr> %[[GEP]], %[[SPLAT:.*]]
 ; CHECK: %[[EXTRACT1:.*]] = extractelement <64 x i1> %[[ICMP]], i32 63
-; CHECK: store i1 %[[EXTRACT1]], i1* %dst
+; CHECK: store i1 %[[EXTRACT1]], ptr %dst
 ; CHECK-NOT: vscale
 entry:
   br label %for.body
 
 for.body:
-  %first.sroa = phi i64* [ %incdec.ptr, %for.body ], [ %start, %entry ]
+  %first.sroa = phi ptr [ %incdec.ptr, %for.body ], [ %start, %entry ]
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %iv.next = add i64 %iv, 1
-  %0 = load i64, i64* %first.sroa
-  %incdec.ptr = getelementptr inbounds i64, i64* %first.sroa, i64 1
-  %cmp.not = icmp eq i64* %incdec.ptr, %start
-  store i1 %cmp.not, i1* %dst
+  %0 = load i64, ptr %first.sroa
+  %incdec.ptr = getelementptr inbounds i64, ptr %first.sroa, i64 1
+  %cmp.not = icmp eq ptr %incdec.ptr, %start
+  store i1 %cmp.not, ptr %dst
   %cmp = icmp ult i64 %iv, %N
   br i1 %cmp, label %for.body, label %end, !llvm.loop !0
 
@@ -107,21 +107,21 @@ end:
   ret void
 }
 
-define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_fixed_width_i128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_fixed_width_i128
-; CHECK: load <4 x i128>, <4 x i128>*
+; CHECK: load <4 x i128>, ptr
 ; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
-; CHECK: store <4 x i128> {{.*}} <4 x i128>*
+; CHECK: store <4 x i128> {{.*}} ptr
 ; CHECK-NOT: vscale
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  %0 = load i128, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  %0 = load i128, ptr %arrayidx, align 16
   %add = add nsw i128 %0, 42
-  store i128 %add, i128* %arrayidx, align 16
+  store i128 %add, ptr %arrayidx, align 16
   %iv.next = add i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
index 90aadeca94cb2..e7a361ecdbff0 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll
@@ -5,63 +5,63 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
 
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, i16* %arrayidx, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i16, ptr %arrayidx, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %cmp2 = icmp sgt i32 %conv, %conv1
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %cmp2, label %if.then, label %for.inc
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %conv6 = add i16 %1, %0
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i16 %conv6, i16* %arrayidx7, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i16 %conv6, ptr %arrayidx7, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   br label %for.inc
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %inc = add nuw nsw i32 %i.016, 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %exitcond.not = icmp eq i32 %inc, %n
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 ; CHECK: LV: Scalar loop costs: 5.
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %1 = load i16, i16* %arrayidx, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
+; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %1 = load i16, ptr %arrayidx, align 2
 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction:   %conv = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 20 for VF 2 For instruction:   %cmp2 = icmp sgt i32 %conv, %conv1
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   br i1 %cmp2, label %if.then, label %for.inc
 ; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction:   %conv6 = add i16 %1, %0
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
-; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction:   store i16 %conv6, i16* %arrayidx7, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
+; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction:   store i16 %conv6, ptr %arrayidx7, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   br label %for.inc
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %inc = add nuw nsw i32 %i.016, 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %exitcond.not = icmp eq i32 %inc, %n
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 ; CHECK: LV: Vector loop of width 2 costs: 43.
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i16, i16* %arrayidx, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i16, ptr %arrayidx, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %cmp2 = icmp sgt i32 %conv, %conv1
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %cmp2, label %if.then, label %for.inc
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %conv6 = add i16 %1, %0
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   store i16 %conv6, i16* %arrayidx7, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   store i16 %conv6, ptr %arrayidx7, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   br label %for.inc
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %inc = add nuw nsw i32 %i.016, 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %exitcond.not = icmp eq i32 %inc, %n
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 ; CHECK: LV: Vector loop of width 4 costs: 2.
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
-; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load i16, i16* %arrayidx, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load i16, ptr %arrayidx, align 2
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %conv = sext i16 %1 to i32
 ; CHECK: LV: Found an estimated cost of 36 for VF 8 For instruction:   %cmp2 = icmp sgt i32 %conv, %conv1
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   br i1 %cmp2, label %if.then, label %for.inc
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %conv6 = add i16 %1, %0
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
-; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   store i16 %conv6, i16* %arrayidx7, align 2
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   store i16 %conv6, ptr %arrayidx7, align 2
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   br label %for.inc
 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction:   %inc = add nuw nsw i32 %i.016, 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction:   %exitcond.not = icmp eq i32 %inc, %n
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 ; CHECK: LV: Vector loop of width 8 costs: 5.
 ; CHECK: LV: Selecting VF: 4.
-define void @expensive_icmp(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n, i16 zeroext %m) #0 {
+define void @expensive_icmp(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n, i16 zeroext %m) #0 {
 entry:
   %cmp15 = icmp sgt i32 %n, 0
   br i1 %cmp15, label %for.body.lr.ph, label %for.cond.cleanup
@@ -76,16 +76,16 @@ for.cond.cleanup:                                 ; preds = %for.inc, %entry
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.inc
   %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-  %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016
-  %1 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %s, i32 %i.016
+  %1 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %1 to i32
   %cmp2 = icmp sgt i32 %conv, %conv1
   br i1 %cmp2, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
   %conv6 = add i16 %1, %0
-  %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016
-  store i16 %conv6, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %d, i32 %i.016
+  store i16 %conv6, ptr %arrayidx7, align 2
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -95,112 +95,112 @@ for.inc:                                          ; preds = %for.body, %if.then
 }
 
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8, i8* %pSrcA.addr.011, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i8, ptr %pSrcA.addr.011, align 1
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv1 = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i8, i8* %pSrcB.addr.09, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i8, ptr %pSrcB.addr.09, align 1
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv3 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %conv3, %conv1
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %shr = ashr i32 %mul, 7
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %2 = icmp slt i32 %shr, 127
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %spec.select.i = select i1 %2, i32 %shr, i32 127
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %conv4 = trunc i32 %spec.select.i to i8
-; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %conv4, i8* %pDst.addr.010, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %conv4, ptr %pDst.addr.010, align 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %dec = add i32 %blkCnt.012, -1
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %cmp.not = icmp eq i32 %dec, 0
 ; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %cmp.not, label %while.end.loopexit, label %while.body
 ; CHECK: LV: Scalar loop costs: 9.
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %0 = load i8, i8* %pSrcA.addr.011, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %0 = load i8, ptr %pSrcA.addr.011, align 1
 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction:   %conv1 = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %1 = load i8, i8* %pSrcB.addr.09, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %1 = load i8, ptr %pSrcB.addr.09, align 1
 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction:   %conv3 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction:   %mul = mul nsw i32 %conv3, %conv1
 ; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   %shr = ashr i32 %mul, 7
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %2 = icmp slt i32 %shr, 127
 ; CHECK: LV: Found an estimated cost of 22 for VF 2 For instruction:   %spec.select.i = select i1 %2, i32 %shr, i32 127
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %conv4 = trunc i32 %spec.select.i to i8
-; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   store i8 %conv4, i8* %pDst.addr.010, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction:   store i8 %conv4, ptr %pDst.addr.010, align 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %dec = add i32 %blkCnt.012, -1
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cmp.not = icmp eq i32 %dec, 0
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   br i1 %cmp.not, label %while.end.loopexit, label %while.body
 ; CHECK: LV: Vector loop of width 2 costs: 65.
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i8, i8* %pSrcA.addr.011, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %0 = load i8, ptr %pSrcA.addr.011, align 1
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv1 = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i8, i8* %pSrcB.addr.09, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %1 = load i8, ptr %pSrcB.addr.09, align 1
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv3 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %mul = mul nsw i32 %conv3, %conv1
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %shr = ashr i32 %mul, 7
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %2 = icmp slt i32 %shr, 127
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %spec.select.i = select i1 %2, i32 %shr, i32 127
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %conv4 = trunc i32 %spec.select.i to i8
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   store i8 %conv4, i8* %pDst.addr.010, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   store i8 %conv4, ptr %pDst.addr.010, align 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %dec = add i32 %blkCnt.012, -1
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cmp.not = icmp eq i32 %dec, 0
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %cmp.not, label %while.end.loopexit, label %while.body
 ; CHECK: LV: Vector loop of width 4 costs: 3.
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %0 = load i8, i8* %pSrcA.addr.011, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %0 = load i8, ptr %pSrcA.addr.011, align 1
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %conv1 = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load i8, i8* %pSrcB.addr.09, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %1 = load i8, ptr %pSrcB.addr.09, align 1
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %conv3 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction:   %mul = mul nsw i32 %conv3, %conv1
 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction:   %shr = ashr i32 %mul, 7
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %2 = icmp slt i32 %shr, 127
 ; CHECK: LV: Found an estimated cost of 4 for VF 8 For instruction:   %spec.select.i = select i1 %2, i32 %shr, i32 127
 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %conv4 = trunc i32 %spec.select.i to i8
-; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   store i8 %conv4, i8* %pDst.addr.010, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   store i8 %conv4, ptr %pDst.addr.010, align 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction:   %dec = add i32 %blkCnt.012, -1
 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction:   %cmp.not = icmp eq i32 %dec, 0
 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction:   br i1 %cmp.not, label %while.end.loopexit, label %while.body
 ; CHECK: LV: Vector loop of width 8 costs: 3.
 ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %0 = load i8, i8* %pSrcA.addr.011, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %0 = load i8, ptr %pSrcA.addr.011, align 1
 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction:   %conv1 = sext i8 %0 to i32
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %1 = load i8, i8* %pSrcB.addr.09, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %1 = load i8, ptr %pSrcB.addr.09, align 1
 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction:   %conv3 = sext i8 %1 to i32
 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction:   %mul = mul nsw i32 %conv3, %conv1
 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction:   %shr = ashr i32 %mul, 7
 ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %2 = icmp slt i32 %shr, 127
 ; CHECK: LV: Found an estimated cost of 8 for VF 16 For instruction:   %spec.select.i = select i1 %2, i32 %shr, i32 127
 ; CHECK: LV: Found an estimated cost of 6 for VF 16 For instruction:   %conv4 = trunc i32 %spec.select.i to i8
-; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   store i8 %conv4, i8* %pDst.addr.010, align 1
+; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   store i8 %conv4, ptr %pDst.addr.010, align 1
 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %dec = add i32 %blkCnt.012, -1
 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %cmp.not = icmp eq i32 %dec, 0
 ; CHECK: LV: Found an estimated cost of 0 for VF 16 For instruction:   br i1 %cmp.not, label %while.end.loopexit, label %while.body
 ; CHECK: LV: Vector loop of width 16 costs: 3.
 ; CHECK: LV: Selecting VF: 16.
-define void @cheap_icmp(i8* nocapture readonly %pSrcA, i8* nocapture readonly %pSrcB, i8* nocapture %pDst, i32 %blockSize) #0 {
+define void @cheap_icmp(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture %pDst, i32 %blockSize) #0 {
 entry:
   %cmp.not8 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not8, label %while.end, label %while.body.preheader
@@ -210,22 +210,22 @@ while.body.preheader:                             ; preds = %entry
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-  %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
-  %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
-  %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1
-  %0 = load i8, i8* %pSrcA.addr.011, align 1
+  %pSrcA.addr.011 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ]
+  %pDst.addr.010 = phi ptr [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ]
+  %pSrcB.addr.09 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %pSrcA.addr.011, i32 1
+  %0 = load i8, ptr %pSrcA.addr.011, align 1
   %conv1 = sext i8 %0 to i32
-  %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1
-  %1 = load i8, i8* %pSrcB.addr.09, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, ptr %pSrcB.addr.09, i32 1
+  %1 = load i8, ptr %pSrcB.addr.09, align 1
   %conv3 = sext i8 %1 to i32
   %mul = mul nsw i32 %conv3, %conv1
   %shr = ashr i32 %mul, 7
   %2 = icmp slt i32 %shr, 127
   %spec.select.i = select i1 %2, i32 %shr, i32 127
   %conv4 = trunc i32 %spec.select.i to i8
-  %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1
-  store i8 %conv4, i8* %pDst.addr.010, align 1
+  %incdec.ptr5 = getelementptr inbounds i8, ptr %pDst.addr.010, i32 1
+  store i8 %conv4, ptr %pDst.addr.010, align 1
   %dec = add i32 %blkCnt.012, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end.loopexit, label %while.body
@@ -240,22 +240,22 @@ while.end:                                        ; preds = %while.end.loopexit,
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %cmp1 = fcmp
 ; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction:   %cmp1 = fcmp
 ; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction:   %cmp1 = fcmp
-define void @floatcmp(float* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 {
+define void @floatcmp(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
 entry:
   %cmp.not7 = icmp eq i32 %blockSize, 0
   br i1 %cmp.not7, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
-  %pSrc.addr.010 = phi float* [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ]
+  %pSrc.addr.010 = phi ptr [ %incdec.ptr2, %while.body ], [ %pSrc, %entry ]
   %blockSize.addr.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pDst.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %pDst, %entry ]
-  %0 = load float, float* %pSrc.addr.010, align 4
+  %pDst.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pDst, %entry ]
+  %0 = load float, ptr %pSrc.addr.010, align 4
   %cmp1 = fcmp nnan ninf nsz olt float %0, 0.000000e+00
   %cond = select nnan ninf nsz i1 %cmp1, float 1.000000e+01, float %0
   %conv = fptosi float %cond to i32
-  %incdec.ptr = getelementptr inbounds i32, i32* %pDst.addr.08, i32 1
-  store i32 %conv, i32* %pDst.addr.08, align 4
-  %incdec.ptr2 = getelementptr inbounds float, float* %pSrc.addr.010, i32 1
+  %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.08, i32 1
+  store i32 %conv, ptr %pDst.addr.08, align 4
+  %incdec.ptr2 = getelementptr inbounds float, ptr %pSrc.addr.010, i32 1
   %dec = add i32 %blockSize.addr.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
index 5fa71e4b781a6..c1492ec5dbb74 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
@@ -4,11 +4,11 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
 
-define void @arm_abs_q7(i8* nocapture readonly %pSrc, i8* nocapture %pDst, i32 %blockSize) #0 {
+define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @arm_abs_q7(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint i8* [[PSRC:%.*]] to i32
-; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint i8* [[PDST:%.*]] to i32
+; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
+; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
 ; CHECK-NEXT:    [[CMP_NOT19:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
@@ -19,47 +19,45 @@ define void @arm_abs_q7(i8* nocapture readonly %pSrc, i8* nocapture %pDst, i32 %
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -16
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[N_VEC]]
 ; CHECK-NEXT:    [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 15
-; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, i8* [[PDST]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, i8* [[PDST]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[NEXT_GEP7]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP6]], <16 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]]
+; CHECK-NEXT:    store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i8* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[PSRC_ADDR_022:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_022:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[BLKCNT_021:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_020:%.*]] = phi i8* [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PSRC_ADDR_022]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load i8, i8* [[PSRC_ADDR_022]], align 1
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[TMP9]], 0
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i8 [[TMP9]], -128
-; CHECK-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP9]]
+; CHECK-NEXT:    [[PDST_ADDR_020:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_022]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[PSRC_ADDR_022]], align 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[TMP7]], 0
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i8 [[TMP7]], -128
+; CHECK-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP7]]
 ; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP5]], i8 127, i8 [[SUB]]
-; CHECK-NEXT:    [[COND11:%.*]] = select i1 [[CMP1]], i8 [[TMP9]], i8 [[COND]]
-; CHECK-NEXT:    [[INCDEC_PTR13]] = getelementptr inbounds i8, i8* [[PDST_ADDR_020]], i32 1
-; CHECK-NEXT:    store i8 [[COND11]], i8* [[PDST_ADDR_020]], align 1
+; CHECK-NEXT:    [[COND11:%.*]] = select i1 [[CMP1]], i8 [[TMP7]], i8 [[COND]]
+; CHECK-NEXT:    [[INCDEC_PTR13]] = getelementptr inbounds i8, ptr [[PDST_ADDR_020]], i32 1
+; CHECK-NEXT:    store i8 [[COND11]], ptr [[PDST_ADDR_020]], align 1
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_021]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -74,18 +72,18 @@ while.body.preheader:                             ; preds = %entry
   br label %while.body
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
-  %pSrc.addr.022 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
+  %pSrc.addr.022 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
   %blkCnt.021 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-  %pDst.addr.020 = phi i8* [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %pSrc.addr.022, i32 1
-  %0 = load i8, i8* %pSrc.addr.022, align 1
+  %pDst.addr.020 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.022, i32 1
+  %0 = load i8, ptr %pSrc.addr.022, align 1
   %cmp1 = icmp sgt i8 %0, 0
   %cmp5 = icmp eq i8 %0, -128
   %sub = sub i8 0, %0
   %cond = select i1 %cmp5, i8 127, i8 %sub
   %cond11 = select i1 %cmp1, i8 %0, i8 %cond
-  %incdec.ptr13 = getelementptr inbounds i8, i8* %pDst.addr.020, i32 1
-  store i8 %cond11, i8* %pDst.addr.020, align 1
+  %incdec.ptr13 = getelementptr inbounds i8, ptr %pDst.addr.020, i32 1
+  store i8 %cond11, ptr %pDst.addr.020, align 1
   %dec = add i32 %blkCnt.021, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end.loopexit, label %while.body
@@ -97,11 +95,11 @@ while.end:                                        ; preds = %while.end.loopexit,
   ret void
 }
 
-define void @arm_abs_q15(i16* nocapture readonly %pSrc, i16* nocapture %pDst, i32 %blockSize) #0 {
+define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @arm_abs_q15(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint i16* [[PSRC:%.*]] to i32
-; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint i16* [[PDST:%.*]] to i32
+; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
+; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
 ; CHECK-NEXT:    [[CMP_NOT20:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
@@ -112,47 +110,49 @@ define void @arm_abs_q15(i16* nocapture readonly %pSrc, i16* nocapture %pDst, i3
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 7
-; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i16, i16* [[PDST]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i16, i16* [[PDST]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[NEXT_GEP7]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP6]], <8 x i16>* [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+; CHECK-NEXT:    [[TMP7:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP6]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP5]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP8]]
+; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr [[NEXT_GEP7]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i16* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[PSRC_ADDR_023:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_023:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[BLKCNT_022:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_021:%.*]] = phi i16* [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRC_ADDR_023]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[PSRC_ADDR_023]], align 2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i16 [[TMP9]], 0
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i16 [[TMP9]], -32768
-; CHECK-NEXT:    [[SUB:%.*]] = sub i16 0, [[TMP9]]
+; CHECK-NEXT:    [[PDST_ADDR_021:%.*]] = phi ptr [ [[INCDEC_PTR13:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRC_ADDR_023]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[PSRC_ADDR_023]], align 2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i16 [[TMP11]], 0
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp eq i16 [[TMP11]], -32768
+; CHECK-NEXT:    [[SUB:%.*]] = sub i16 0, [[TMP11]]
 ; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP5]], i16 32767, i16 [[SUB]]
-; CHECK-NEXT:    [[COND11:%.*]] = select i1 [[CMP1]], i16 [[TMP9]], i16 [[COND]]
-; CHECK-NEXT:    [[INCDEC_PTR13]] = getelementptr inbounds i16, i16* [[PDST_ADDR_021]], i32 1
-; CHECK-NEXT:    store i16 [[COND11]], i16* [[PDST_ADDR_021]], align 2
+; CHECK-NEXT:    [[COND11:%.*]] = select i1 [[CMP1]], i16 [[TMP11]], i16 [[COND]]
+; CHECK-NEXT:    [[INCDEC_PTR13]] = getelementptr inbounds i16, ptr [[PDST_ADDR_021]], i32 1
+; CHECK-NEXT:    store i16 [[COND11]], ptr [[PDST_ADDR_021]], align 2
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_022]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -167,18 +167,18 @@ while.body.preheader:                             ; preds = %entry
   br label %while.body
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
-  %pSrc.addr.023 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
+  %pSrc.addr.023 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
   %blkCnt.022 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-  %pDst.addr.021 = phi i16* [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.023, i32 1
-  %0 = load i16, i16* %pSrc.addr.023, align 2
+  %pDst.addr.021 = phi ptr [ %incdec.ptr13, %while.body ], [ %pDst, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.023, i32 1
+  %0 = load i16, ptr %pSrc.addr.023, align 2
   %cmp1 = icmp sgt i16 %0, 0
   %cmp5 = icmp eq i16 %0, -32768
   %sub = sub i16 0, %0
   %cond = select i1 %cmp5, i16 32767, i16 %sub
   %cond11 = select i1 %cmp1, i16 %0, i16 %cond
-  %incdec.ptr13 = getelementptr inbounds i16, i16* %pDst.addr.021, i32 1
-  store i16 %cond11, i16* %pDst.addr.021, align 2
+  %incdec.ptr13 = getelementptr inbounds i16, ptr %pDst.addr.021, i32 1
+  store i16 %cond11, ptr %pDst.addr.021, align 2
   %dec = add i32 %blkCnt.022, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end.loopexit, label %while.body
@@ -190,11 +190,11 @@ while.end:                                        ; preds = %while.end.loopexit,
   ret void
 }
 
-define void @arm_abs_q31(i32* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 {
+define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @arm_abs_q31(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint i32* [[PSRC:%.*]] to i32
-; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint i32* [[PDST:%.*]] to i32
+; CHECK-NEXT:    [[PSRC2:%.*]] = ptrtoint ptr [[PSRC:%.*]] to i32
+; CHECK-NEXT:    [[PDST1:%.*]] = ptrtoint ptr [[PDST:%.*]] to i32
 ; CHECK-NEXT:    [[CMP_NOT14:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT14]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
@@ -205,47 +205,49 @@ define void @arm_abs_q31(i32* nocapture readonly %pSrc, i32* nocapture %pDst, i3
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 3
-; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i32, i32* [[PDST]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP2]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[PSRC]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i32, i32* [[PDST]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP7]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP8]]
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[NEXT_GEP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i32* [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[PDST]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[PSRC_ADDR_017:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_017:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[BLKCNT_016:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_015:%.*]] = phi i32* [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[PSRC_ADDR_017]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[PSRC_ADDR_017]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 0
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP9]], -2147483648
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[TMP9]]
+; CHECK-NEXT:    [[PDST_ADDR_015:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[PSRC_ADDR_017]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[PSRC_ADDR_017]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP11]], -2147483648
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[TMP11]]
 ; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP2]], i32 2147483647, i32 [[SUB]]
-; CHECK-NEXT:    [[COND6:%.*]] = select i1 [[CMP1]], i32 [[TMP9]], i32 [[COND]]
-; CHECK-NEXT:    [[INCDEC_PTR7]] = getelementptr inbounds i32, i32* [[PDST_ADDR_015]], i32 1
-; CHECK-NEXT:    store i32 [[COND6]], i32* [[PDST_ADDR_015]], align 4
+; CHECK-NEXT:    [[COND6:%.*]] = select i1 [[CMP1]], i32 [[TMP11]], i32 [[COND]]
+; CHECK-NEXT:    [[INCDEC_PTR7]] = getelementptr inbounds i32, ptr [[PDST_ADDR_015]], i32 1
+; CHECK-NEXT:    store i32 [[COND6]], ptr [[PDST_ADDR_015]], align 4
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_016]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -260,18 +262,18 @@ while.body.preheader:                             ; preds = %entry
   br label %while.body
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
-  %pSrc.addr.017 = phi i32* [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
+  %pSrc.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %while.body.preheader ]
   %blkCnt.016 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ]
-  %pDst.addr.015 = phi i32* [ %incdec.ptr7, %while.body ], [ %pDst, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i32, i32* %pSrc.addr.017, i32 1
-  %0 = load i32, i32* %pSrc.addr.017, align 4
+  %pDst.addr.015 = phi ptr [ %incdec.ptr7, %while.body ], [ %pDst, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i32, ptr %pSrc.addr.017, i32 1
+  %0 = load i32, ptr %pSrc.addr.017, align 4
   %cmp1 = icmp sgt i32 %0, 0
   %cmp2 = icmp eq i32 %0, -2147483648
   %sub = sub nsw i32 0, %0
   %cond = select i1 %cmp2, i32 2147483647, i32 %sub
   %cond6 = select i1 %cmp1, i32 %0, i32 %cond
-  %incdec.ptr7 = getelementptr inbounds i32, i32* %pDst.addr.015, i32 1
-  store i32 %cond6, i32* %pDst.addr.015, align 4
+  %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.015, i32 1
+  store i32 %cond6, ptr %pDst.addr.015, align 4
   %dec = add i32 %blkCnt.016, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end.loopexit, label %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
index a180bf43f5355..0515ccea94b5b 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
@@ -12,7 +12,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; CHECK-COST: Found an estimated cost of 8 for VF 4 For instruction:   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
 ; CHECK-COST: Found an estimated cost of 2 for VF 8 For instruction:   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
 
-define void @arm_offset_q15(i16* nocapture readonly %pSrc, i16 signext %offset, i16* nocapture noalias %pDst, i32 %blockSize) #0 {
+define void @arm_offset_q15(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @arm_offset_q15(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -25,14 +25,14 @@ define void @arm_offset_q15(i16* nocapture readonly %pSrc, i16 signext %offset,
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[BLOCKSIZE]])
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT7]])
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP5]] to <8 x i16>*
-; CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT7]])
+; CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[TMP2]], ptr [[NEXT_GEP5]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[WHILE_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -45,13 +45,13 @@ entry:
 
 while.body:                                       ; preds = %entry, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pSrc.addr.08 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
-  %pDst.addr.07 = phi i16* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.08, i32 1
-  %0 = load i16, i16* %pSrc.addr.08, align 2
+  %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
+  %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1
+  %0 = load i16, ptr %pSrc.addr.08, align 2
   %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset)
-  %incdec.ptr3 = getelementptr inbounds i16, i16* %pDst.addr.07, i32 1
-  store i16 %1, i16* %pDst.addr.07, align 2
+  %incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1
+  store i16 %1, ptr %pDst.addr.07, align 2
   %dec = add i32 %blkCnt.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body
@@ -63,3 +63,5 @@ while.end:                                        ; preds = %while.body, %entry
 declare i16 @llvm.sadd.sat.i16(i16, i16)
 
 attributes #0 = { "target-features"="+mve" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-COST: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index decffcb045f32..5d85a4cd73fdd 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -11,7 +11,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; CHECK-COST: LV: Found an estimated cost of 26 for VF 2 For instruction:   %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
 
-define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB, i32 %blockSize) #0 {
+define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %blockSize) #0 {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT16:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -21,63 +21,65 @@ define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB,
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -4
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[PA:%.*]], i32 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr float, float* [[PB:%.*]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PA:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[PB:%.*]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IND_END3:%.*]] = and i32 [[BLOCKSIZE]], 3
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[PA]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr float, float* [[PB]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[NEXT_GEP5]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD6]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]])
-; CHECK-NEXT:    [[TMP6:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD6]])
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP8]])
-; CHECK-NEXT:    [[TMP10:%.*]] = fdiv fast <4 x float> [[TMP9]], [[TMP7]]
-; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP10]]
-; CHECK-NEXT:    [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP11]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PA]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PB]], i32 [[TMP3]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[NEXT_GEP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD6]], zeroinitializer
+; CHECK-NEXT:    [[DOTNOT8:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = fdiv fast <4 x float> [[TMP10]], [[TMP8]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[DOTNOT8]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP11]]
+; CHECK-NEXT:    [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP12]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]])
+; CHECK-NEXT:    [[TMP14:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi float* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PB]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PA]], [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PB]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[WHILE_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP14]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[PA_ADDR_020:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PB_ADDR_019:%.*]] = phi float* [ [[INCDEC_PTR1:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PA_ADDR_020:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[PB_ADDR_019:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[BLOCKSIZE_ADDR_018:%.*]] = phi i32 [ [[DEC:%.*]], [[IF_END]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ACCUM_017:%.*]] = phi float [ [[ACCUM_1:%.*]], [[IF_END]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PA_ADDR_020]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = load float, float* [[PA_ADDR_020]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR1]] = getelementptr inbounds float, float* [[PB_ADDR_019]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = load float, float* [[PB_ADDR_019]], align 4
-; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast une float [[TMP14]], 0.000000e+00
-; CHECK-NEXT:    [[CMP3:%.*]] = fcmp fast une float [[TMP15]], 0.000000e+00
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PA_ADDR_020]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = load float, ptr [[PA_ADDR_020]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR1]] = getelementptr inbounds float, ptr [[PB_ADDR_019]], i32 1
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[PB_ADDR_019]], align 4
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast une float [[TMP15]], 0.000000e+00
+; CHECK-NEXT:    [[CMP3:%.*]] = fcmp fast une float [[TMP16]], 0.000000e+00
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP16:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP14]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP15]])
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP17]], [[TMP16]]
-; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float [[TMP14]], [[TMP15]]
-; CHECK-NEXT:    [[TMP18:%.*]] = tail call fast float @llvm.fabs.f32(float [[SUB]])
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast float [[TMP18]], [[ADD]]
+; CHECK-NEXT:    [[TMP18:%.*]] = tail call fast float @llvm.fabs.f32(float [[TMP16]])
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP18]], [[TMP17]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = tail call fast float @llvm.fabs.f32(float [[SUB]])
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast float [[TMP19]], [[ADD]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast float [[DIV]], [[ACCUM_017]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
@@ -86,7 +88,7 @@ define float @test(float* nocapture readonly %pA, float* nocapture readonly %pB,
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       while.end:
-; CHECK-NEXT:    [[ACCUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ACCUM_1]], [[IF_END]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ACCUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ACCUM_1]], [[IF_END]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[ACCUM_0_LCSSA]]
 ;
 entry:
@@ -94,14 +96,14 @@ entry:
   br i1 %cmp.not16, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %if.end
-  %pA.addr.020 = phi float* [ %incdec.ptr, %if.end ], [ %pA, %entry ]
-  %pB.addr.019 = phi float* [ %incdec.ptr1, %if.end ], [ %pB, %entry ]
+  %pA.addr.020 = phi ptr [ %incdec.ptr, %if.end ], [ %pA, %entry ]
+  %pB.addr.019 = phi ptr [ %incdec.ptr1, %if.end ], [ %pB, %entry ]
   %blockSize.addr.018 = phi i32 [ %dec, %if.end ], [ %blockSize, %entry ]
   %accum.017 = phi float [ %accum.1, %if.end ], [ 0.000000e+00, %entry ]
-  %incdec.ptr = getelementptr inbounds float, float* %pA.addr.020, i32 1
-  %0 = load float, float* %pA.addr.020, align 4
-  %incdec.ptr1 = getelementptr inbounds float, float* %pB.addr.019, i32 1
-  %1 = load float, float* %pB.addr.019, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %pA.addr.020, i32 1
+  %0 = load float, ptr %pA.addr.020, align 4
+  %incdec.ptr1 = getelementptr inbounds float, ptr %pB.addr.019, i32 1
+  %1 = load float, ptr %pB.addr.019, align 4
   %cmp2 = fcmp fast une float %0, 0.000000e+00
   %cmp3 = fcmp fast une float %1, 0.000000e+00
   %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
@@ -131,3 +133,5 @@ while.end:                                        ; preds = %if.end, %entry
 declare float @llvm.fabs.f32(float)
 
 attributes #0 = { "target-features"="+mve.fp" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-COST: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll
index ca95279921ce7..3589aaa04b1a5 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-vldn.ll
@@ -9,7 +9,7 @@ target triple = "thumbv8.1m.main-none-none-eabi"
 ; CHECK-LABEL: vld2
 ; CHECK-2: vector.body
 ; CHECK-NO2-NOT: vector.body
-define void @vld2(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
+define void @vld2(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
 entry:
   %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
   %0 = bitcast i16 %tmp.0.extract.trunc to half
@@ -19,19 +19,19 @@ entry:
   br i1 %cmp26, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
-  %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
-  %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ]
+  %pIn.addr.029 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ]
+  %pOut.addr.028 = phi ptr [ %add.ptr7, %while.body ], [ %pOut, %entry ]
   %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
-  %1 = load half, half* %pIn.addr.029, align 2
-  %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1
-  %2 = load half, half* %arrayidx2, align 2
+  %1 = load half, ptr %pIn.addr.029, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.029, i32 1
+  %2 = load half, ptr %arrayidx2, align 2
   %mul3 = fmul half %1, %0
   %mul4 = fmul half %2, %0
-  store half %mul3, half* %pOut.addr.028, align 2
-  %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1
-  store half %mul4, half* %arrayidx6, align 2
-  %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2
-  %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2
+  store half %mul3, ptr %pOut.addr.028, align 2
+  %arrayidx6 = getelementptr inbounds half, ptr %pOut.addr.028, i32 1
+  store half %mul4, ptr %arrayidx6, align 2
+  %add.ptr = getelementptr inbounds half, ptr %pIn.addr.029, i32 2
+  %add.ptr7 = getelementptr inbounds half, ptr %pOut.addr.028, i32 2
   %dec = add nsw i32 %blkCnt.027, -1
   %cmp = icmp eq i32 %dec, 0
   br i1 %cmp, label %while.end, label %while.body
@@ -43,7 +43,7 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK-LABEL: vld4
 ; CHECK-4: vector.body
 ; CHECK-NO4-NOT: vector.body
-define void @vld4(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
+define void @vld4(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
 entry:
   %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
   %0 = bitcast i16 %tmp.0.extract.trunc to half
@@ -53,29 +53,29 @@ entry:
   br i1 %cmp38, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
-  %pIn.addr.041 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
-  %pOut.addr.040 = phi half* [ %add.ptr13, %while.body ], [ %pOut, %entry ]
+  %pIn.addr.041 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ]
+  %pOut.addr.040 = phi ptr [ %add.ptr13, %while.body ], [ %pOut, %entry ]
   %blkCnt.039 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
-  %1 = load half, half* %pIn.addr.041, align 2
-  %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.041, i32 1
-  %2 = load half, half* %arrayidx2, align 2
-  %arrayidx3 = getelementptr inbounds half, half* %pIn.addr.041, i32 2
-  %3 = load half, half* %arrayidx3, align 2
-  %arrayidx4 = getelementptr inbounds half, half* %pIn.addr.041, i32 3
-  %4 = load half, half* %arrayidx4, align 2
+  %1 = load half, ptr %pIn.addr.041, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.041, i32 1
+  %2 = load half, ptr %arrayidx2, align 2
+  %arrayidx3 = getelementptr inbounds half, ptr %pIn.addr.041, i32 2
+  %3 = load half, ptr %arrayidx3, align 2
+  %arrayidx4 = getelementptr inbounds half, ptr %pIn.addr.041, i32 3
+  %4 = load half, ptr %arrayidx4, align 2
   %mul5 = fmul half %1, %0
   %mul6 = fmul half %2, %0
   %mul7 = fmul half %3, %0
   %mul8 = fmul half %4, %0
-  store half %mul5, half* %pOut.addr.040, align 2
-  %arrayidx10 = getelementptr inbounds half, half* %pOut.addr.040, i32 1
-  store half %mul6, half* %arrayidx10, align 2
-  %arrayidx11 = getelementptr inbounds half, half* %pOut.addr.040, i32 2
-  store half %mul7, half* %arrayidx11, align 2
-  %arrayidx12 = getelementptr inbounds half, half* %pOut.addr.040, i32 3
-  store half %mul8, half* %arrayidx12, align 2
-  %add.ptr = getelementptr inbounds half, half* %pIn.addr.041, i32 4
-  %add.ptr13 = getelementptr inbounds half, half* %pOut.addr.040, i32 4
+  store half %mul5, ptr %pOut.addr.040, align 2
+  %arrayidx10 = getelementptr inbounds half, ptr %pOut.addr.040, i32 1
+  store half %mul6, ptr %arrayidx10, align 2
+  %arrayidx11 = getelementptr inbounds half, ptr %pOut.addr.040, i32 2
+  store half %mul7, ptr %arrayidx11, align 2
+  %arrayidx12 = getelementptr inbounds half, ptr %pOut.addr.040, i32 3
+  store half %mul8, ptr %arrayidx12, align 2
+  %add.ptr = getelementptr inbounds half, ptr %pIn.addr.041, i32 4
+  %add.ptr13 = getelementptr inbounds half, ptr %pOut.addr.040, i32 4
   %dec = add nsw i32 %blkCnt.039, -1
   %cmp = icmp eq i32 %dec, 0
   br i1 %cmp, label %while.end, label %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index d394fda439583..7277d6d3e1514 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-none-eabi"
 
-define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %s, i32%y) {
+define hidden void @pointer_phi_v4i32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %s, i32%y) {
 ; CHECK-LABEL: @pointer_phi_v4i32_add1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
@@ -12,13 +12,13 @@ define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -28,14 +28,14 @@ define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load i32, i32* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 1
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 1
   %add = add nsw i32 %0, %y
-  store i32 %add, i32* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  store i32 %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -43,37 +43,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v4i32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v4i32_add2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 1992
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A]], i32 [[TMP0]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 3
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
-; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 2
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[Y]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -83,14 +82,14 @@ define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load i32, i32* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 2
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 2
   %add = add nsw i32 %0, %y
-  store i32 %add, i32* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  store i32 %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -98,36 +97,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v4i32_add3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 2988
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_09]], i32 3
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -137,14 +136,14 @@ define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load i32, i32* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 3
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %A.addr.09, i32 3
   %add = add nsw i32 %0, %y
-  store i32 %add, i32* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  store i32 %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -152,7 +151,7 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v8i16_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v8i16_add1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
@@ -161,13 +160,13 @@ define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -178,14 +177,14 @@ entry:
   %0 = trunc i32 %y to i16
   br label %for.body
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
-  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %l1 = load i16, i16* %A.addr.011, align 2
-  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 1
+  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, ptr %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 1
   %conv1 = add i16 %l1, %0
-  store i16 %conv1, i16* %B.addr.09, align 2
-  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  store i16 %conv1, ptr %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
   %inc = add nuw nsw i32 %i.010, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -193,38 +192,37 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v8i16_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v8i16_add2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1984
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A]], i32 [[TMP1]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[NEXT_GEP4]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 2
+; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 2
 ; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
-; CHECK-NEXT:    store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1
+; CHECK-NEXT:    store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -235,14 +233,14 @@ entry:
   %0 = trunc i32 %y to i16
   br label %for.body
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
-  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %l1 = load i16, i16* %A.addr.011, align 2
-  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 2
+  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, ptr %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 2
   %conv1 = add i16 %l1, %0
-  store i16 %conv1, i16* %B.addr.09, align 2
-  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  store i16 %conv1, ptr %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
   %inc = add nuw nsw i32 %i.010, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -250,20 +248,20 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v8i16_add3(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v8i16_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v8i16_add3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi i16* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 3
+; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[A_ADDR_011]], align 2
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, ptr [[A_ADDR_011]], i32 3
 ; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
-; CHECK-NEXT:    store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1
+; CHECK-NEXT:    store i16 [[CONV1]], ptr [[B_ADDR_09]], align 2
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[B_ADDR_09]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
@@ -274,14 +272,14 @@ entry:
   %0 = trunc i32 %y to i16
   br label %for.body
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
-  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.011 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %l1 = load i16, i16* %A.addr.011, align 2
-  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 3
+  %B.addr.09 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, ptr %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, ptr %A.addr.011, i32 3
   %conv1 = add i16 %l1, %0
-  store i16 %conv1, i16* %B.addr.09, align 2
-  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  store i16 %conv1, ptr %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, ptr %B.addr.09, i32 1
   %inc = add nuw nsw i32 %i.010, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -289,36 +287,34 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v16i8_add1(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v16i8_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v16i8_add1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 992
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[NEXT_GEP4]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP2]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 1
-; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP5]], [[TMP0]]
-; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 1
+; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP3]], [[TMP0]]
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -330,14 +326,14 @@ entry:
   br label %for.body
 
 for.body:
-  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %1 = load i8, i8* %A.addr.010, align 1
-  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 1
+  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, ptr %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 1
   %conv1 = add i8 %1, %0
-  store i8 %conv1, i8* %B.addr.08, align 1
-  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  store i8 %conv1, ptr %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -345,38 +341,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v16i8_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v16i8_add2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 1984
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 1984
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 992
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[TMP1]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP1]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 1
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[NEXT_GEP4]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 2
-; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP6]], [[TMP0]]
-; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 2
+; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP4]], [[TMP0]]
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -388,14 +382,14 @@ entry:
   br label %for.body
 
 for.body:
-  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %1 = load i8, i8* %A.addr.010, align 1
-  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 2
+  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, ptr %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 2
   %conv1 = add i8 %1, %0
-  store i8 %conv1, i8* %B.addr.08, align 1
-  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  store i8 %conv1, ptr %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -403,20 +397,20 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v16i8_add3(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+define hidden void @pointer_phi_v16i8_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v16i8_add3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi ptr [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 3
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi ptr [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_010]], i32 3
 ; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP1]], [[TMP0]]
-; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[B_ADDR_08]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
@@ -428,14 +422,14 @@ entry:
   br label %for.body
 
 for.body:
-  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.010 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %1 = load i8, i8* %A.addr.010, align 1
-  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 3
+  %B.addr.08 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, ptr %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, ptr %A.addr.010, i32 3
   %conv1 = add i8 %1, %0
-  store i8 %conv1, i8* %B.addr.08, align 1
-  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  store i8 %conv1, ptr %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %B.addr.08, i32 1
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -443,7 +437,7 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+define hidden void @pointer_phi_v4f32_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
 ; CHECK-LABEL: @pointer_phi_v4f32_add1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
@@ -451,13 +445,13 @@ define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A,
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -467,14 +461,14 @@ define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load float, float* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 1
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 1
   %add = fadd fast float %0, %y
-  store float %add, float* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  store float %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -482,37 +476,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+define hidden void @pointer_phi_v4f32_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
 ; CHECK-LABEL: @pointer_phi_v4f32_add2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 1992
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 7968
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[A]], i32 [[TMP0]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 3
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
-; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 2
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP5]], [[Y]]
-; CHECK-NEXT:    store float [[ADD]], float* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]]
+; CHECK-NEXT:    store float [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -522,14 +515,14 @@ define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load float, float* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 2
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 2
   %add = fadd fast float %0, %y
-  store float %add, float* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  store float %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -537,36 +530,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
 ; CHECK-LABEL: @pointer_phi_v4f32_add3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 2988
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 11952
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 3984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, float* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 48
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, ptr [[A_ADDR_09]], i32 3
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]]
-; CHECK-NEXT:    store float [[ADD]], float* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    store float [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
@@ -576,14 +569,14 @@ define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load float, float* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 3
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, ptr %A.addr.09, i32 3
   %add = fadd fast float %0, %y
-  store float %add, float* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  store float %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -591,7 +584,7 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+define hidden void @pointer_phi_v4half_add1(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
 ; CHECK-LABEL: @pointer_phi_v4half_add1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
@@ -599,13 +592,13 @@ define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A,
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr half, half* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[NEXT_GEP]] to <8 x half>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
-; CHECK-NEXT:    store <8 x half> [[TMP1]], <8 x half>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
@@ -615,14 +608,14 @@ define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load half, half* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 1
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 1
   %add = fadd fast half %0, %y
-  store half %add, half* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  store half %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -630,37 +623,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+define hidden void @pointer_phi_v4half_add2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
 ; CHECK-LABEL: @pointer_phi_v4half_add2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 1984
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr half, half* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 3968
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x half>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
-; CHECK-NEXT:    store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4
+; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 2
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]]
-; CHECK-NEXT:    store half [[ADD]], half* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]]
+; CHECK-NEXT:    store half [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
@@ -670,14 +662,14 @@ define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load half, half* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 2
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 2
   %add = fadd fast half %0, %y
-  store half %add, half* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  store half %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -685,37 +677,36 @@ end:
   ret void
 }
 
-define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+define hidden void @pointer_phi_v4half_add3(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, half %y) {
 ; CHECK-LABEL: @pointer_phi_v4half_add3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 2976
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr half, half* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 5952
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 1984
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]]
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <24 x half>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x half>, <24 x half>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[INDEX]], 6
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP1]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <24 x half>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x half> [[WIDE_VEC]], <24 x half> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
-; CHECK-NEXT:    store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4
+; CHECK-NEXT:    store <8 x half> [[TMP2]], ptr [[NEXT_GEP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 3
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]]
-; CHECK-NEXT:    store half [[ADD]], half* [[B_ADDR_07]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, ptr [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP4]], [[Y]]
+; CHECK-NEXT:    store half [[ADD]], ptr [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, ptr [[B_ADDR_07]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
@@ -725,14 +716,14 @@ define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A,
 entry:
   br label %for.body
 for.body:
-  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %A.addr.09 = phi ptr [ %add.ptr, %for.body ], [ %A, %entry ]
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
-  %0 = load half, half* %A.addr.09, align 4
-  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 3
+  %B.addr.07 = phi ptr [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, ptr %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, ptr %A.addr.09, i32 3
   %add = fadd fast half %0, %y
-  store half %add, half* %B.addr.07, align 4
-  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  store half %add, ptr %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, ptr %B.addr.07, i32 1
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body
@@ -743,46 +734,45 @@ end:
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.interleave.count", i32 2}
 
-define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) {
+define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v4i32_uf2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59952
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9992
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239808
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39968
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 6, i32 12, i32 18>
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 24, i32 30, i32 36, i32 42>
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]]
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 48
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992
-; CHECK-NEXT:    br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 192
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992
+; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9992, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A_ADDR_08]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[Y]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_06]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1
+; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[A_ADDR_08]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP7]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_06]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -795,14 +785,14 @@ for.cond.cleanup:
   ret void
 
 for.body:
-  %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %0 = load i32, i32* %A.addr.08, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6
+  %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %0 = load i32, ptr %A.addr.08, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6
   %add = add nsw i32 %0, %y
-  store i32 %add, i32* %B.addr.06, align 4
-  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1
+  store i32 %add, ptr %B.addr.06, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1
   %inc = add nuw nsw i32 %i.07, 1
   %exitcond = icmp eq i32 %inc, 10000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -811,11 +801,11 @@ for.body:
 !2 = distinct !{!2, !3}
 !3 = !{!"llvm.loop.interleave.count", i32 4}
 
-define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) {
+define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %n, i32 %y) {
 ; CHECK-LABEL: @pointer_phi_v4i32_uf4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59904
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9984
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 239616
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 39936
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
@@ -826,47 +816,44 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3
 ; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 6, i32 12, i32 18>
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 24, i32 30, i32 36, i32 42>
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 48, i32 54, i32 60, i32 66>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 72, i32 78, i32 84, i32 90>
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]]
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 8
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP12]], align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 12
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 192, i32 216, i32 240, i32 264>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 288, i32 312, i32 336, i32 360>
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP4]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]]
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 96
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984
-; CHECK-NEXT:    br i1 [[TMP15]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 384
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984
+; CHECK-NEXT:    br i1 [[TMP12]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9984, [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A_ADDR_08]], align 4
-; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP16]], [[Y]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_06]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1
+; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END2]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[A_ADDR_08]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[A_ADDR_08]], i32 6
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[B_ADDR_06]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[B_ADDR_06]], i32 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
@@ -878,75 +865,75 @@ for.cond.cleanup:
   ret void
 
 for.body:
-  %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %A.addr.08 = phi ptr [ %A, %entry ], [ %add.ptr, %for.body ]
   %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
-  %0 = load i32, i32* %A.addr.08, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6
+  %B.addr.06 = phi ptr [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %0 = load i32, ptr %A.addr.08, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %A.addr.08, i32 6
   %add = add nsw i32 %0, %y
-  store i32 %add, i32* %B.addr.06, align 4
-  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1
+  store i32 %add, ptr %B.addr.06, align 4
+  %incdec.ptr = getelementptr inbounds i32, ptr %B.addr.06, i32 1
   %inc = add nuw nsw i32 %i.07, 1
   %exitcond = icmp eq i32 %inc, 10000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2
 }
 
-define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) {
+define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias nocapture %z) {
 ; CHECK-LABEL: @mult_ptr_iv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]]
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[Z:%.*]], i32 3000
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[X:%.*]], i32 3000
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[Z]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[X]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, i8* [[Z]], i32 3000
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[X]], i32 3000
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[Z]], i32 3000
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI5:%.*]] = phi ptr [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope !28
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 2
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP5]], <4 x ptr> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP6]], <4 x ptr> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !31, !noalias !28
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12
-; CHECK-NEXT:    [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI5]], i32 12
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
+; CHECK-NEXT:    [[X_ADDR_050:%.*]] = phi ptr [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[Z_ADDR_049:%.*]] = phi ptr [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
 ; CHECK-NEXT:    [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[X_ADDR_050]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 2
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR2]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[INCDEC_PTR1]], align 1
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP10]], 10
 ; CHECK-NEXT:    [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]]
-; CHECK-NEXT:    [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1
-; CHECK-NEXT:    store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2
-; CHECK-NEXT:    store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1
-; CHECK-NEXT:    [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3
-; CHECK-NEXT:    store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 1
+; CHECK-NEXT:    store i8 [[MUL]], ptr [[Z_ADDR_049]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 2
+; CHECK-NEXT:    store i8 [[MUL1]], ptr [[INCDEC_PTR32]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR34]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 3
+; CHECK-NEXT:    store i8 [[MUL2]], ptr [[INCDEC_PTR33]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_048]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
@@ -957,15 +944,15 @@ entry:
   br label %for.body
 
 for.body:
-  %x.addr.050 = phi i8* [ %incdec.ptr2, %for.body ], [ %x, %entry ]
-  %z.addr.049 = phi i8* [ %incdec.ptr34, %for.body ], [ %z, %entry ]
+  %x.addr.050 = phi ptr [ %incdec.ptr2, %for.body ], [ %x, %entry ]
+  %z.addr.049 = phi ptr [ %incdec.ptr34, %for.body ], [ %z, %entry ]
   %i.048 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %x.addr.050, i32 1
-  %0 = load i8, i8* %x.addr.050, align 1
-  %incdec.ptr1 = getelementptr inbounds i8, i8* %x.addr.050, i32 2
-  %1 = load i8, i8* %incdec.ptr, align 1
-  %incdec.ptr2 = getelementptr inbounds i8, i8* %x.addr.050, i32 3
-  %2 = load i8, i8* %incdec.ptr1, align 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %x.addr.050, i32 1
+  %0 = load i8, ptr %x.addr.050, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, ptr %x.addr.050, i32 2
+  %1 = load i8, ptr %incdec.ptr, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, ptr %x.addr.050, i32 3
+  %2 = load i8, ptr %incdec.ptr1, align 1
   %conv = zext i8 %0 to i32
   %mul = mul nuw nsw i32 %conv, 10
   %conv1 = zext i8 %1 to i32
@@ -975,12 +962,12 @@ for.body:
   %conv3 = trunc i32 %mul to i8
   %conv4 = trunc i32 %mul1 to i8
   %conv5 = trunc i32 %mul2 to i8
-  %incdec.ptr32 = getelementptr inbounds i8, i8* %z.addr.049, i32 1
-  store i8 %conv3, i8* %z.addr.049, align 1
-  %incdec.ptr33 = getelementptr inbounds i8, i8* %z.addr.049, i32 2
-  store i8 %conv4, i8* %incdec.ptr32, align 1
-  %incdec.ptr34 = getelementptr inbounds i8, i8* %z.addr.049, i32 3
-  store i8 %conv5, i8* %incdec.ptr33, align 1
+  %incdec.ptr32 = getelementptr inbounds i8, ptr %z.addr.049, i32 1
+  store i8 %conv3, ptr %z.addr.049, align 1
+  %incdec.ptr33 = getelementptr inbounds i8, ptr %z.addr.049, i32 2
+  store i8 %conv4, ptr %incdec.ptr32, align 1
+  %incdec.ptr34 = getelementptr inbounds i8, ptr %z.addr.049, i32 3
+  store i8 %conv5, ptr %incdec.ptr33, align 1
   %inc = add nuw i32 %i.048, 1
   %exitcond = icmp eq i32 %inc, 1000
   br i1 %exitcond, label %end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
index 571d649103143..82ac429720483 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
 ; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding.
 
-define dso_local void @f1(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) {
+define dso_local void @f1(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) {
 ; CHECK-LABEL: f1(
 ; CHECK:       entry:
 ; CHECK:       @llvm.get.active.lane.mask
@@ -25,19 +25,19 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond.not = icmp eq i32 %inc, %N
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define dso_local void @f32_reduction(float* nocapture readonly %Input, i32 %N, float* nocapture %Output) {
+define dso_local void @f32_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) {
 ; CHECK-LABEL: f32_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -52,9 +52,9 @@ while.body.preheader:                             ; preds = %entry
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
   %sum.08 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
-  %Input.addr.07 = phi float* [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds float, float* %Input.addr.07, i32 1
-  %0 = load float, float* %Input.addr.07, align 4
+  %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds float, ptr %Input.addr.07, i32 1
+  %0 = load float, ptr %Input.addr.07, align 4
   %add = fadd fast float %0, %sum.08
   %dec = add i32 %blkCnt.09, -1
   %cmp = icmp eq i32 %dec, 0
@@ -68,11 +68,11 @@ while.end:                                        ; preds = %while.end.loopexit,
   %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
   %conv = uitofp i32 %N to float
   %div = fdiv fast float %sum.0.lcssa, %conv
-  store float %div, float* %Output, align 4
+  store float %div, ptr %Output, align 4
   ret void
 }
 
-define dso_local void @f16_reduction(half* nocapture readonly %Input, i32 %N, half* nocapture %Output) {
+define dso_local void @f16_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) {
 ; CHECK-LABEL: f16_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -87,9 +87,9 @@ while.body.preheader:                             ; preds = %entry
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
   %sum.08 = phi half [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
-  %Input.addr.07 = phi half* [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds half, half* %Input.addr.07, i32 1
-  %0 = load half, half* %Input.addr.07, align 2
+  %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds half, ptr %Input.addr.07, i32 1
+  %0 = load half, ptr %Input.addr.07, align 2
   %add = fadd fast half %0, %sum.08
   %dec = add i32 %blkCnt.09, -1
   %cmp = icmp eq i32 %dec, 0
@@ -103,11 +103,11 @@ while.end:                                        ; preds = %while.end.loopexit,
   %sum.0.lcssa = phi half [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
   %conv = uitofp i32 %N to half
   %div = fdiv fast half %sum.0.lcssa, %conv
-  store half %div, half* %Output, align 2
+  store half %div, ptr %Output, align 2
   ret void
 }
 
-define dso_local void @mixed_f32_i32_reduction(float* nocapture readonly %fInput, i32* nocapture readonly %iInput, i32 %N, float* nocapture %fOutput, i32* nocapture %iOutput) {
+define dso_local void @mixed_f32_i32_reduction(ptr nocapture readonly %fInput, ptr nocapture readonly %iInput, i32 %N, ptr nocapture %fOutput, ptr nocapture %iOutput) {
 ; CHECK-LABEL: mixed_f32_i32_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -123,13 +123,13 @@ while.body:
   %blkCnt.020 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
   %isum.019 = phi i32 [ %add2, %while.body ], [ 0, %while.body.preheader ]
   %fsum.018 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
-  %fInput.addr.017 = phi float* [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ]
-  %iInput.addr.016 = phi i32* [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds float, float* %fInput.addr.017, i32 1
-  %incdec.ptr1 = getelementptr inbounds i32, i32* %iInput.addr.016, i32 1
-  %0 = load i32, i32* %iInput.addr.016, align 4
+  %fInput.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ]
+  %iInput.addr.016 = phi ptr [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds float, ptr %fInput.addr.017, i32 1
+  %incdec.ptr1 = getelementptr inbounds i32, ptr %iInput.addr.016, i32 1
+  %0 = load i32, ptr %iInput.addr.016, align 4
   %add2 = add nsw i32 %0, %isum.019
-  %1 = load float, float* %fInput.addr.017, align 4
+  %1 = load float, ptr %fInput.addr.017, align 4
   %add = fadd fast float %1, %fsum.018
   %dec = add i32 %blkCnt.020, -1
   %cmp = icmp eq i32 %dec, 0
@@ -146,14 +146,14 @@ while.end:
   %isum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %phitmp, %while.end.loopexit ]
   %conv = uitofp i32 %N to float
   %div = fdiv fast float %fsum.0.lcssa, %conv
-  store float %div, float* %fOutput, align 4
+  store float %div, ptr %fOutput, align 4
   %div5 = fdiv fast float %isum.0.lcssa, %conv
   %conv6 = fptosi float %div5 to i32
-  store i32 %conv6, i32* %iOutput, align 4
+  store i32 %conv6, ptr %iOutput, align 4
   ret void
 }
 
-define dso_local i32 @i32_mul_reduction(i32* noalias nocapture readonly %B, i32 %N) {
+define dso_local i32 @i32_mul_reduction(ptr noalias nocapture readonly %B, i32 %N) {
 ; CHECK-LABEL: i32_mul_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -176,15 +176,15 @@ for.cond.cleanup:
 for.body:
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %S.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, %S.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define dso_local i32 @i32_or_reduction(i32* noalias nocapture readonly %B, i32 %N) {
+define dso_local i32 @i32_or_reduction(ptr noalias nocapture readonly %B, i32 %N) {
 ; CHECK-LABEL: i32_or_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -207,15 +207,15 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %S.07 = phi i32 [ %or, %for.body ], [ 1, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %or = or i32 %0, %S.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define dso_local i32 @i32_and_reduction(i32* noalias nocapture readonly %A, i32 %N, i32 %S) {
+define dso_local i32 @i32_and_reduction(ptr noalias nocapture readonly %A, i32 %N, i32 %S) {
 ; CHECK-LABEL: i32_and_reduction(
 ; CHECK:       vector.body:
 ; CHECK:       @llvm.masked.load
@@ -238,8 +238,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %S.addr.06 = phi i32 [ %and, %for.body ], [ %S, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, %S.addr.06
   %inc = add nuw nsw i32 %i.07, 1
   %exitcond = icmp eq i32 %inc, %N

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll
index ee9c903d46bda..571d93a217de8 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-scalar-epilogue-fallback.ll
@@ -9,68 +9,66 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabihf"
 
-define void @outside_user_blocks_tail_folding(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
+define void @outside_user_blocks_tail_folding(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
 ; CHECK-LABEL: @outside_user_blocks_tail_folding(
 ; CHECK-NEXT:  header:
-; CHECK-NEXT:    [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
+; CHECK-NEXT:    [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 16
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 16
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[WIDE_LOAD]], <16 x i8>* [[TMP5]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
 ; CHECK-NEXT:    [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
+; CHECK-NEXT:    [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[DEC66]], -1
-; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    store i8 [[TMP7]], i8* [[BUFF]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    store i8 [[TMP5]], ptr [[BUFF]], align 1
 ; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4
 ; CHECK-NEXT:    ret void
 ;
 header:
-  %ptr0 = load i8*, i8** %pos, align 4
+  %ptr0 = load ptr, ptr %pos, align 4
   br label %body
 
 body:
   %dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
-  %buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
+  %buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1
   %dec = add nsw i32 %dec66, -1
-  %0 = load i8, i8* %incdec.ptr, align 1
-  store i8 %0, i8* %buff, align 1
+  %0 = load i8, ptr %incdec.ptr, align 1
+  store i8 %0, ptr %buff, align 1
   %tobool11 = icmp eq i32 %dec, 0
   br i1 %tobool11, label %end, label %body
 
 end:
-  store i8* %incdec.ptr, i8** %pos, align 4
+  store ptr %incdec.ptr, ptr %pos, align 4
   ret void
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll b/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll
index aededbe7c3e91..ebdf0a1f78120 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/vector_cast.ll
@@ -7,21 +7,20 @@ target triple = "armv7--linux-gnueabi"
 ; for the stores to the struct. Here we need to perform a bitcast from a vector
 ; of pointers to a vector i32s.
 
-%class.A = type { i8*, i32 }
+%class.A = type { ptr, i32 }
 
 ; CHECK-LABEL: test0
-define void @test0(%class.A* %StartPtr, %class.A* %APtr) {
+define void @test0(ptr %StartPtr, ptr %APtr) {
 entry:
   br label %for.body.i
 
 for.body.i:
-  %addr = phi %class.A* [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ]
-  %Data.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 0
-  store i8* null, i8** %Data.i.i, align 4, !tbaa !8
-  %Length.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 1
-  store i32 0, i32* %Length.i.i, align 4, !tbaa !11
-  %incdec.ptr.i = getelementptr inbounds %class.A, %class.A* %addr, i32 1
-  %cmp.i = icmp eq %class.A* %incdec.ptr.i, %APtr
+  %addr = phi ptr [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ]
+  store ptr null, ptr %addr, align 4, !tbaa !8
+  %Length.i.i = getelementptr inbounds %class.A, ptr %addr, i32 0, i32 1
+  store i32 0, ptr %Length.i.i, align 4, !tbaa !11
+  %incdec.ptr.i = getelementptr inbounds %class.A, ptr %addr, i32 1
+  %cmp.i = icmp eq ptr %incdec.ptr.i, %APtr
   br i1 %cmp.i, label %exit, label %for.body.i
 
 exit:

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
index d0908cc28b67d..3d6e0743df172 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/pr30990.ll
@@ -2,10 +2,10 @@
 
 target triple = "powerpc64-unknown-linux-gnu"
 
-define signext i32 @foo(i8* readonly %ptr, i32 signext %l) {
+define signext i32 @foo(ptr readonly %ptr, i32 signext %l) {
 entry:
   %idx.ext = sext i32 %l to i64
-  %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 %idx.ext
   %cmp7 = icmp sgt i32 %l, 0
   br i1 %cmp7, label %while.body.preheader, label %while.end
 
@@ -14,13 +14,13 @@ while.body.preheader:                             ; preds = %entry
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
-  %0 = load i8, i8* %ptr.addr.08, align 1
+  %ptr.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i8, ptr %ptr.addr.08, align 1
   %cmp1 = icmp slt i8 %0, -64
   %cond = zext i1 %cmp1 to i32
   %add = add nsw i32 %cond, %count.09
-  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1
-  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.08, i64 1
+  %cmp = icmp ult ptr %incdec.ptr, %add.ptr
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body
@@ -36,10 +36,10 @@ while.end:                                        ; preds = %while.end.loopexit,
 }
 
 
-define signext i16 @foo2(i8* readonly %ptr, i32 signext %l) {
+define signext i16 @foo2(ptr readonly %ptr, i32 signext %l) {
 entry:
   %idx.ext = sext i32 %l to i64
-  %add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i8, ptr %ptr, i64 %idx.ext
   %cmp7 = icmp sgt i32 %l, 0
   br i1 %cmp7, label %while.body.preheader, label %while.end
 
@@ -48,13 +48,13 @@ while.body.preheader:                             ; preds = %entry
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %count.09 = phi i16 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %ptr.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
-  %0 = load i8, i8* %ptr.addr.08, align 1
+  %ptr.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i8, ptr %ptr.addr.08, align 1
   %cmp1 = icmp slt i8 %0, -64
   %cond = zext i1 %cmp1 to i16
   %add = add nsw i16 %cond, %count.09
-  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.08, i64 1
-  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.08, i64 1
+  %cmp = icmp ult ptr %incdec.ptr, %add.ptr
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body
@@ -70,10 +70,10 @@ while.end:                                        ; preds = %while.end.loopexit,
 ; CHECK: icmp slt <8 x i8>
 }
 
-define signext i32 @foo3(i16* readonly %ptr, i32 signext %l) {
+define signext i32 @foo3(ptr readonly %ptr, i32 signext %l) {
 entry:
   %idx.ext = sext i32 %l to i64
-  %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i16, ptr %ptr, i64 %idx.ext
   %cmp7 = icmp sgt i32 %l, 0
   br i1 %cmp7, label %while.body.preheader, label %while.end
 
@@ -82,13 +82,13 @@ while.body.preheader:                             ; preds = %entry
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %count.09 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
-  %0 = load i16, i16* %ptr.addr.16, align 1
+  %ptr.addr.16 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i16, ptr %ptr.addr.16, align 1
   %cmp1 = icmp slt i16 %0, -64
   %cond = zext i1 %cmp1 to i32
   %add = add nsw i32 %cond, %count.09
-  %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1
-  %cmp = icmp ult i16* %incdec.ptr, %add.ptr
+  %incdec.ptr = getelementptr inbounds i16, ptr %ptr.addr.16, i64 1
+  %cmp = icmp ult ptr %incdec.ptr, %add.ptr
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body
@@ -104,10 +104,10 @@ while.end:                                        ; preds = %while.end.loopexit,
 ; CHECK: icmp slt <4 x i16>
 }
 
-define i64 @foo4(i16* readonly %ptr, i32 signext %l) {
+define i64 @foo4(ptr readonly %ptr, i32 signext %l) {
 entry:
   %idx.ext = sext i32 %l to i64
-  %add.ptr = getelementptr inbounds i16, i16* %ptr, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i16, ptr %ptr, i64 %idx.ext
   %cmp7 = icmp sgt i32 %l, 0
   br i1 %cmp7, label %while.body.preheader, label %while.end
 
@@ -116,13 +116,13 @@ while.body.preheader:                             ; preds = %entry
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %count.09 = phi i64 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %ptr.addr.16 = phi i16* [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
-  %0 = load i16, i16* %ptr.addr.16, align 1
+  %ptr.addr.16 = phi ptr [ %incdec.ptr, %while.body ], [ %ptr, %while.body.preheader ]
+  %0 = load i16, ptr %ptr.addr.16, align 1
   %cmp1 = icmp slt i16 %0, -64
   %cond = zext i1 %cmp1 to i64
   %add = add nsw i64 %cond, %count.09
-  %incdec.ptr = getelementptr inbounds i16, i16* %ptr.addr.16, i64 1
-  %cmp = icmp ult i16* %incdec.ptr, %add.ptr
+  %incdec.ptr = getelementptr inbounds i16, ptr %ptr.addr.16, i64 1
+  %cmp = icmp ult ptr %incdec.ptr, %add.ptr
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
index e3285e67af2b7..b63f2cfa8758f 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
@@ -23,11 +23,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -59,12 +59,12 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %tmp1 = add nsw i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
-  %tmp = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1
+  %tmp = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %tmp to i32
   %tmp2 = add nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
-  %tmp3 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2
+  %tmp3 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %tmp3 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -76,7 +76,7 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define i64 @bar(i64* nocapture %a) {
+define i64 @bar(ptr nocapture %a) {
 ; CHECK-LABEL: bar
 
 ; CHECK: Executing best plan with VF=2, UF=12
@@ -91,10 +91,10 @@ for.cond.cleanup:
 for.body:
   %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.012
-  store i64 %add, i64* %arrayidx, align 8
+  store i64 %add, ptr %arrayidx, align 8
   %add2 = add nsw i64 %add, %s.011
   %inc = add nuw nsw i64 %i.012, 1
   %exitcond = icmp eq i64 %inc, 1024
@@ -114,12 +114,12 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
-  %tmp = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
-  %tmp1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
-  store i32 %tmp1, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv
+  %tmp = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp
+  %tmp1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv
+  store i32 %tmp1, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 10000
   br i1 %exitcond, label %for.end, label %for.body
@@ -128,7 +128,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
+define float @float_(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 ;CHECK-LABEL: float_
 ;CHECK: LV(REG): VF = 1
 ;CHECK: LV(REG): Found max usage: 2 item
@@ -148,10 +148,10 @@ preheader:
 for:
   %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
   %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %t1 = load float, float* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %t2 = load float, float* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %t1 = load float, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %t2 = load float, ptr %arrayidx3, align 4
   %add = fadd fast float %t1, %s.02
   %add4 = fadd fast float %add, %t2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
@@ -168,7 +168,7 @@ for.end:
 }
 
 
-define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
 ;CHECK-LABEL: double_
 ;CHECK-PWR8: LV(REG): VF = 2
 ;CHECK-PWR8: LV(REG): Found max usage: 2 item
@@ -189,8 +189,8 @@ define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
 
 ; <label>:2                                       ; preds = %2, %0
   %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
-  %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
-  %4 = load double, double* %3, align 8
+  %3 = getelementptr inbounds double, ptr %A, i64 %indvars.iv
+  %4 = load double, ptr %3, align 8
   %5 = fadd double %4, 3.000000e+00
   %6 = fmul double %4, 2.000000e+00
   %7 = fadd double %5, %6
@@ -210,7 +210,7 @@ define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
   %21 = fadd double %20, %17
   %22 = fadd double %21, 3.000000e+00
   %23 = fmul double %4, %22
-  store double %23, double* %3, align 8
+  store double %23, ptr %3, align 8
   %indvars.iv.next = add i64 %indvars.iv, -1
   %24 = trunc i64 %indvars.iv to i32
   %25 = icmp eq i32 %24, 0
@@ -220,7 +220,7 @@ define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
   ret void
 }
 
-define ppc_fp128 @fp128_(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
+define ppc_fp128 @fp128_(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
 ;CHECK-LABEL: fp128_
 ;CHECK: LV(REG): VF = 1
 ;CHECK: LV(REG): Found max usage: 2 item
@@ -232,8 +232,8 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
-  %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds ppc_fp128, ptr %n, i32 %i.06
+  %0 = load ppc_fp128, ptr %arrayidx, align 8
   %sub = fsub fast ppc_fp128 %x.05, %0
   %inc = add nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, 2048
@@ -244,7 +244,7 @@ for.end:                                          ; preds = %for.body
 }
 
 
-define void @fp16_(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
+define void @fp16_(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
 ;CHECK-LABEL: fp16_
 ;CHECK: LV(REG): VF = 1
 ;CHECK: LV(REG): Found max usage: 2 item
@@ -259,19 +259,19 @@ entry:
   br i1 %cmp26, label %while.end, label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
-  %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
-  %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ]
+  %pIn.addr.029 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ]
+  %pOut.addr.028 = phi ptr [ %add.ptr7, %while.body ], [ %pOut, %entry ]
   %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
-  %1 = load half, half* %pIn.addr.029, align 2
-  %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1
-  %2 = load half, half* %arrayidx2, align 2
+  %1 = load half, ptr %pIn.addr.029, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.029, i32 1
+  %2 = load half, ptr %arrayidx2, align 2
   %mul3 = fmul half %1, %0
   %mul4 = fmul half %2, %0
-  store half %mul3, half* %pOut.addr.028, align 2
-  %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1
-  store half %mul4, half* %arrayidx6, align 2
-  %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2
-  %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2
+  store half %mul3, ptr %pOut.addr.028, align 2
+  %arrayidx6 = getelementptr inbounds half, ptr %pOut.addr.028, i32 1
+  store half %mul4, ptr %arrayidx6, align 2
+  %add.ptr = getelementptr inbounds half, ptr %pIn.addr.029, i32 2
+  %add.ptr7 = getelementptr inbounds half, ptr %pOut.addr.028, i32 2
   %dec = add nsw i32 %blkCnt.027, -1
   %cmp = icmp eq i32 %dec, 0
   br i1 %cmp, label %while.end, label %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
index ccd43775174f1..f7aa8e0aa12a7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll
@@ -3,16 +3,16 @@
 target triple = "riscv64-linux-gnu"
 
 ;
-define dso_local void @loop_i128(i128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_i128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_i128(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, i128* [[PTR:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i128, i128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, ptr [[PTR:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i128 [[TMP0]], 42
-; CHECK-NEXT:    store i128 [[ADD]], i128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    store i128 [[ADD]], ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -24,10 +24,10 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  %0 = load i128, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  %0 = load i128, ptr %arrayidx, align 16
   %add = add nsw i128 %0, 42
-  store i128 %add, i128* %arrayidx, align 16
+  store i128 %add, ptr %arrayidx, align 16
   %iv.next = add i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -36,16 +36,16 @@ for.end:
   ret void
 }
 
-define dso_local void @loop_f128(fp128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_f128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_f128(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds fp128, fp128* [[PTR:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load fp128, fp128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds fp128, ptr [[PTR:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load fp128, ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[ADD:%.*]] = fsub fp128 [[TMP0]], 0xL00000000000000008000000000000000
-; CHECK-NEXT:    store fp128 [[ADD]], fp128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    store fp128 [[ADD]], ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0]]
@@ -57,10 +57,10 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
-  %0 = load fp128, fp128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds fp128, ptr %ptr, i64 %iv
+  %0 = load fp128, ptr %arrayidx, align 16
   %add = fsub fp128 %0, 0xL00000000000000008000000000000000
-  store fp128 %add, fp128* %arrayidx, align 16
+  store fp128 %add, ptr %arrayidx, align 16
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -69,14 +69,14 @@ for.end:
   ret void
 }
 
-define dso_local void @loop_invariant_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
+define dso_local void @loop_invariant_i128(ptr nocapture %ptr, i128 %val, i64 %N) {
 ; CHECK-LABEL: @loop_invariant_i128(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, i128* [[PTR:%.*]], i64 [[IV]]
-; CHECK-NEXT:    store i128 [[VAL:%.*]], i128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, ptr [[PTR:%.*]], i64 [[IV]]
+; CHECK-NEXT:    store i128 [[VAL:%.*]], ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0]]
@@ -88,8 +88,8 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  store i128 %val, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  store i128 %val, ptr %arrayidx, align 16
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -98,7 +98,7 @@ for.end:
   ret void
 }
 
-define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
+define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) {
 ; CHECK-LABEL: @uniform_store_i1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
@@ -107,49 +107,48 @@ define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i64, i64* [[START:%.*]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT]], <2 x i64*> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64*> poison, i64* [[START]], i32 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT3]], <2 x i64*> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], 8
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i64* [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> <i64 0, i64 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, i64* [[POINTER_PHI]], <2 x i64> <i64 2, i64 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64*> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, i64* [[TMP3]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP1]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, <2 x i64*> [[TMP2]], i64 1
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <2 x i64*> [[TMP8]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <2 x i64*> [[TMP9]], [[BROADCAST_SPLAT4]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
-; CHECK-NEXT:    store i1 [[TMP12]], i1* [[DST:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 0, i64 8>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> <i64 16, i64 24>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 2
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, <2 x ptr> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, <2 x ptr> [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <2 x ptr> [[TMP7]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq <2 x ptr> [[TMP8]], [[BROADCAST_SPLAT4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT:    store i1 [[TMP11]], ptr [[DST:%.*]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i64, i64* [[POINTER_PHI]], i64 4
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[FIRST_SROA:%.*]] = phi i64* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[FIRST_SROA:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[FIRST_SROA]], align 4
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i64, i64* [[FIRST_SROA]], i64 1
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64* [[INCDEC_PTR]], [[START]]
-; CHECK-NEXT:    store i1 [[CMP_NOT]], i1* [[DST]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load i64, ptr [[FIRST_SROA]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i64, ptr [[FIRST_SROA]], i64 1
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[START]]
+; CHECK-NEXT:    store i1 [[CMP_NOT]], ptr [[DST]], align 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IV]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       end:
@@ -159,13 +158,13 @@ entry:
   br label %for.body
 
 for.body:
-  %first.sroa = phi i64* [ %incdec.ptr, %for.body ], [ %start, %entry ]
+  %first.sroa = phi ptr [ %incdec.ptr, %for.body ], [ %start, %entry ]
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %iv.next = add i64 %iv, 1
-  %0 = load i64, i64* %first.sroa
-  %incdec.ptr = getelementptr inbounds i64, i64* %first.sroa, i64 1
-  %cmp.not = icmp eq i64* %incdec.ptr, %start
-  store i1 %cmp.not, i1* %dst
+  %0 = load i64, ptr %first.sroa
+  %incdec.ptr = getelementptr inbounds i64, ptr %first.sroa, i64 1
+  %cmp.not = icmp eq ptr %incdec.ptr, %start
+  store i1 %cmp.not, ptr %dst
   %cmp = icmp ult i64 %iv, %N
   br i1 %cmp, label %for.body, label %end, !llvm.loop !0
 
@@ -173,16 +172,16 @@ end:
   ret void
 }
 
-define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
+define dso_local void @loop_fixed_width_i128(ptr nocapture %ptr, i64 %N) {
 ; CHECK-LABEL: @loop_fixed_width_i128(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, i128* [[PTR:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i128, i128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i128, ptr [[PTR:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i128 [[TMP0]], 42
-; CHECK-NEXT:    store i128 [[ADD]], i128* [[ARRAYIDX]], align 16
+; CHECK-NEXT:    store i128 [[ADD]], ptr [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -194,10 +193,10 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
-  %0 = load i128, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %iv
+  %0 = load i128, ptr %arrayidx, align 16
   %add = add nsw i128 %0, 42
-  store i128 %add, i128* %arrayidx, align 16
+  store i128 %add, ptr %arrayidx, align 16
   %iv.next = add i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index e11cd6c6fdd88..fddc21ebff6d5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -20,7 +20,7 @@
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
 
-define void @add(float* noalias nocapture readonly %src1, float* noalias nocapture readonly %src2, i32 signext %size, float* noalias nocapture writeonly %result) {
+define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
 ; CHECK-LABEL: add
 ; CHECK-SCALAR:      LV(REG): Found max usage: 2 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
@@ -58,19 +58,19 @@ for.cond.cleanup:
 
 for.body:
   %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src1, i64 %i.011
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %src2, i64 %i.011
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src1, i64 %i.011
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %src2, i64 %i.011
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
-  %arrayidx3 = getelementptr inbounds float, float* %result, i64 %i.011
-  store float %add, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %result, i64 %i.011
+  store float %add, ptr %arrayidx3, align 4
   %add4 = add nuw nsw i64 %i.011, 1
   %exitcond.not = icmp eq i64 %add4, %conv
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-define void @goo(i32** nocapture noundef %a, i32 noundef signext %n) {
+define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
 ; CHECK-LABEL: goo
 ; CHECK-SCALAR:      LV(REG): Found max usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
@@ -102,10 +102,10 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32*, i32** %a, i64 %indvars.iv
-  %0 = load i32*, i32** %arrayidx, align 8
-  %add.ptr = getelementptr inbounds i32, i32* %0, i64 1
-  store i32* %add.ptr, i32** %arrayidx, align 8
+  %arrayidx = getelementptr inbounds ptr, ptr %a, i64 %indvars.iv
+  %0 = load ptr, ptr %arrayidx, align 8
+  %add.ptr = getelementptr inbounds i32, ptr %0, i64 1
+  store ptr %add.ptr, ptr %arrayidx, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr38110.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr38110.ll
index 6c8fef9fc91c6..7684cf82ef1ad 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr38110.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr38110.ll
@@ -13,7 +13,7 @@
 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 target triple = "s390x-ibm-linux"
 
-define void @test(i32 zeroext %width, i8* nocapture %row, i16 zeroext %src, i16* nocapture readonly %dst) {
+define void @test(i32 zeroext %width, ptr nocapture %row, i16 zeroext %src, ptr nocapture readonly %dst) {
 entry:
   %cmp10 = icmp eq i32 %width, 0
   br i1 %cmp10, label %for.end, label %for.body.lr.ph
@@ -24,21 +24,21 @@ for.body.lr.ph:                                   ; preds = %entry
 
 for.body:                                         ; preds = %for.inc, %for.body.lr.ph
   %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-  %sp.011 = phi i8* [ %row, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
-  %0 = load i8, i8* %sp.011, align 1
+  %sp.011 = phi ptr [ %row, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
+  %0 = load i8, ptr %sp.011, align 1
   %conv = zext i8 %0 to i32
   %cmp2 = icmp eq i32 %conv, %conv1
   br i1 %cmp2, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i16, i16* %dst, align 2
+  %1 = load i16, ptr %dst, align 2
   %conv4 = trunc i16 %1 to i8
-  store i8 %conv4, i8* %sp.011, align 1
+  store i8 %conv4, ptr %sp.011, align 1
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
   %inc = add nuw i32 %i.012, 1
-  %incdec.ptr = getelementptr inbounds i8, i8* %sp.011, i64 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %sp.011, i64 1
   %exitcond = icmp eq i32 %inc, %width
   br i1 %exitcond, label %for.end.loopexit, label %for.body
 

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
index 10db12e0905c9..b5fe50ce9380e 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -26,8 +26,8 @@ define void @func_21() {
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -35,8 +35,8 @@ define void @func_21() {
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP10]], i32 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -45,17 +45,17 @@ define void @func_21() {
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
-; CHECK-NEXT:    store i32 [[TMP16]], i32* [[TMP15]], align 4
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP15]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.if3:
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
-; CHECK-NEXT:    store i32 [[TMP19]], i32* [[TMP18]], align 4
+; CHECK-NEXT:    store i32 [[TMP19]], ptr [[TMP18]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
@@ -73,10 +73,10 @@ define void @func_21() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LV:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[A_PTR:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LV]] = load i32, i32* [[A_PTR]], align 4
-; CHECK-NEXT:    [[B_PTR:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4
+; CHECK-NEXT:    [[A_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LV]] = load i32, ptr [[A_PTR]], align 4
+; CHECK-NEXT:    [[B_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
@@ -89,10 +89,10 @@ entry:
 loop:                                    ; preds = %loop, %entry
   %rec = phi i32 [ 0, %entry], [ %lv, %loop ]
   %indvars.iv = phi i64 [ 0, %entry], [ %indvars.iv.next, %loop ]
-  %A.ptr= getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 %indvars.iv
-  %lv = load i32, i32* %A.ptr, align 4
-  %B.ptr = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 %indvars.iv
-  store i32 %rec, i32* %B.ptr, align 4
+  %A.ptr= getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %lv = load i32, ptr %A.ptr, align 4
+  %B.ptr = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 %indvars.iv
+  store i32 %rec, ptr %B.ptr, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 5
   br i1 %exitcond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index c60f3ed523865..41fe13a9e0a49 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-pc_linux"
 
 ; The source code:
 ;
-;void foo1(float * __restrict__ in, float * __restrict__ out, int * __restrict__ trigger, int * __restrict__ index) {
+;void foo1(ptr __restrict__ in, ptr __restrict__ out, int * __restrict__ trigger, int * __restrict__ index) {
 ;
 ;  for (int i=0; i < SIZE; ++i) {
 ;    if (trigger[i] > 0) {
@@ -21,51 +21,48 @@ target triple = "x86_64-pc_linux"
 ;}
 
 ; Function Attrs: nounwind uwtable
-define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr noalias %index) {
 ; AVX512-LABEL: @foo1(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX1]], 0
-; AVX512-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
-; AVX512-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>*
-; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 4
-; AVX512-NEXT:    [[TMP4:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD]], zeroinitializer
-; AVX512-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[INDEX:%.*]], i64 [[TMP0]]
-; AVX512-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[TMP5]], i32 0
-; AVX512-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>*
-; AVX512-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP7]], i32 4, <16 x i1> [[TMP4]], <16 x i32> poison)
-; AVX512-NEXT:    [[TMP8:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64>
-; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[IN:%.*]], <16 x i64> [[TMP8]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP9]], i32 4, <16 x i1> [[TMP4]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP10:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP11:%.*]] = getelementptr float, float* [[OUT:%.*]], i64 [[TMP0]]
-; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0
-; AVX512-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <16 x float>*
-; AVX512-NEXT:    call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP10]], <16 x float>* [[TMP13]], i32 4, <16 x i1> [[TMP4]])
+; AVX512-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4
+; AVX512-NEXT:    [[TMP3:%.*]] = icmp sgt <16 x i32> [[WIDE_LOAD]], zeroinitializer
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[INDEX:%.*]], i64 [[TMP0]]
+; AVX512-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; AVX512-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP5]], i32 4, <16 x i1> [[TMP3]], <16 x i32> poison)
+; AVX512-NEXT:    [[TMP6:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64>
+; AVX512-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <16 x i64> [[TMP6]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP7]], i32 4, <16 x i1> [[TMP3]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]]
+; AVX512-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
+; AVX512-NEXT:    call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP8]], ptr [[TMP10]], i32 4, <16 x i1> [[TMP3]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
-; AVX512-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; AVX512-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; AVX512-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
+; AVX512-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP15]], 0
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP12]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
-; AVX512-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP16]] to i64
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[IN]], i64 [[IDXPROM4]]
-; AVX512-NEXT:    [[TMP17:%.*]] = load float, float* [[ARRAYIDX5]], align 4
-; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP17]], 5.000000e-01
-; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    store float [[ADD]], float* [[ARRAYIDX7]], align 4
+; AVX512-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; AVX512-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP13]] to i64
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[IDXPROM4]]
+; AVX512-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP14]], 5.000000e-01
+; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    store float [[ADD]], ptr [[ARRAYIDX7]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -80,44 +77,41 @@ define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger
 ; FVW2:       vector.body:
 ; FVW2-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX1]], 0
-; FVW2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
-; FVW2-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
-; FVW2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
-; FVW2-NEXT:    [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; FVW2-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[INDEX:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[TMP5]], i32 0
-; FVW2-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
-; FVW2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* [[TMP7]], i32 4, <2 x i1> [[TMP4]], <2 x i32> poison)
-; FVW2-NEXT:    [[TMP8:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64>
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[IN:%.*]], <2 x i64> [[TMP8]]
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP9]], i32 4, <2 x i1> [[TMP4]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
-; FVW2-NEXT:    [[TMP11:%.*]] = getelementptr float, float* [[OUT:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0
-; FVW2-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <2 x float>*
-; FVW2-NEXT:    call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> [[TMP10]], <2 x float>* [[TMP13]], i32 4, <2 x i1> [[TMP4]])
+; FVW2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; FVW2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; FVW2-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[INDEX:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; FVW2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr [[TMP5]], i32 4, <2 x i1> [[TMP3]], <2 x i32> poison)
+; FVW2-NEXT:    [[TMP6:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64>
+; FVW2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <2 x i64> [[TMP6]]
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP7]], i32 4, <2 x i1> [[TMP3]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
+; FVW2-NEXT:    call void @llvm.masked.store.v2f32.p0(<2 x float> [[TMP8]], ptr [[TMP10]], i32 4, <2 x i1> [[TMP3]])
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
-; FVW2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; FVW2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; FVW2-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
+; FVW2-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; FVW2:       middle.block:
 ; FVW2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP15]], 0
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP12]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[INDEX]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
-; FVW2-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP16]] to i64
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[IN]], i64 [[IDXPROM4]]
-; FVW2-NEXT:    [[TMP17:%.*]] = load float, float* [[ARRAYIDX5]], align 4
-; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP17]], 5.000000e-01
-; FVW2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    store float [[ADD]], float* [[ARRAYIDX7]], align 4
+; FVW2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; FVW2-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP13]] to i64
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[IDXPROM4]]
+; FVW2-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP14]], 5.000000e-01
+; FVW2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    store float [[ADD]], ptr [[ARRAYIDX7]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -131,20 +125,20 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx3 = getelementptr inbounds i32, i32* %index, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %index, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx3, align 4
   %idxprom4 = sext i32 %1 to i64
-  %arrayidx5 = getelementptr inbounds float, float* %in, i64 %idxprom4
-  %2 = load float, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %in, i64 %idxprom4
+  %2 = load float, ptr %arrayidx5, align 4
   %add = fadd float %2, 5.000000e-01
-  %arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  store float %add, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %out, i64 %indvars.iv
+  store float %add, ptr %arrayidx7, align 4
   br label %for.inc
 
 for.inc:
@@ -157,7 +151,7 @@ for.end:
 }
 
 ; The source code
-;void foo2 (In * __restrict__ in, float * __restrict__ out, int * __restrict__ trigger) {
+;void foo2 (In * __restrict__ in, ptr __restrict__ out, int * __restrict__ trigger) {
 ;
 ;  for (int i=0; i<SIZE; i += 16) {
 ;    if (trigger[i] > 0) {
@@ -168,21 +162,21 @@ for.end:
 
 %struct.In = type { float, float }
 
-define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
+define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr noalias %index) #0 {
 ; AVX512-LABEL: @foo2(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
+; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
 ; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[OUT:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP3]], <16 x float*> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
 ; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -192,16 +186,16 @@ define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %tr
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP6]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    [[TMP7:%.*]] = load float, float* [[B]], align 4
+; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    [[TMP7:%.*]] = load float, ptr [[B]], align 4
 ; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP7]], 5.000000e-01
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    store float [[ADD]], float* [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    store float [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -219,30 +213,30 @@ define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %tr
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; FVW2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
-; FVW2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; FVW2-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
 ; FVW2-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
 ; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
 ; FVW2-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; FVW2-NEXT:    store float [[TMP13]], float* [[TMP12]], align 4
+; FVW2-NEXT:    store float [[TMP13]], ptr [[TMP12]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; FVW2:       pred.store.continue:
 ; FVW2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
 ; FVW2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.if2:
-; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; FVW2-NEXT:    store float [[TMP16]], float* [[TMP15]], align 4
+; FVW2-NEXT:    store float [[TMP16]], ptr [[TMP15]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
@@ -254,16 +248,16 @@ define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %tr
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP18]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    [[TMP19:%.*]] = load float, float* [[B]], align 4
+; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    [[TMP19:%.*]] = load float, ptr [[B]], align 4
 ; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP19]], 5.000000e-01
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    store float [[ADD]], float* [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    store float [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -277,17 +271,17 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %b = getelementptr inbounds %struct.In, %struct.In* %in, i64 %indvars.iv, i32 1
-  %1 = load float, float* %b, align 4
+  %b = getelementptr inbounds %struct.In, ptr %in, i64 %indvars.iv, i32 1
+  %1 = load float, ptr %b, align 4
   %add = fadd float %1, 5.000000e-01
-  %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %out, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   br label %for.inc
 
 for.inc:
@@ -315,21 +309,21 @@ for.end:
 
 %struct.Out = type { float, float }
 
-define void @foo3(%struct.In* noalias %in, %struct.Out* noalias %out, i32* noalias %trigger) {
+define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
 ; AVX512-LABEL: @foo3(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
+; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
 ; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], %struct.Out* [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP3]], <16 x float*> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -339,16 +333,16 @@ define void @foo3(%struct.In* noalias %in, %struct.Out* noalias %out, i32* noali
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP6]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    [[TMP7:%.*]] = load float, float* [[B]], align 4
+; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    [[TMP7:%.*]] = load float, ptr [[B]], align 4
 ; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP7]], 5.000000e-01
-; AVX512-NEXT:    [[B6:%.*]] = getelementptr inbounds [[STRUCT_OUT]], %struct.Out* [[OUT]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    store float [[ADD]], float* [[B6]], align 4
+; AVX512-NEXT:    [[B6:%.*]] = getelementptr inbounds [[STRUCT_OUT]], ptr [[OUT]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    store float [[ADD]], ptr [[B6]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -366,30 +360,30 @@ define void @foo3(%struct.In* noalias %in, %struct.Out* noalias %out, i32* noali
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; FVW2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
-; FVW2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; FVW2-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
 ; FVW2-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
 ; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], %struct.Out* [[OUT:%.*]], i64 [[TMP0]], i32 1
+; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[TMP0]], i32 1
 ; FVW2-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; FVW2-NEXT:    store float [[TMP13]], float* [[TMP12]], align 4
+; FVW2-NEXT:    store float [[TMP13]], ptr [[TMP12]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; FVW2:       pred.store.continue:
 ; FVW2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
 ; FVW2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; FVW2:       pred.store.if1:
-; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_OUT]], %struct.Out* [[OUT]], i64 [[TMP1]], i32 1
+; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_OUT]], ptr [[OUT]], i64 [[TMP1]], i32 1
 ; FVW2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; FVW2-NEXT:    store float [[TMP16]], float* [[TMP15]], align 4
+; FVW2-NEXT:    store float [[TMP16]], ptr [[TMP15]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; FVW2:       pred.store.continue2:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -401,16 +395,16 @@ define void @foo3(%struct.In* noalias %in, %struct.Out* noalias %out, i32* noali
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP18]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    [[TMP19:%.*]] = load float, float* [[B]], align 4
+; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    [[TMP19:%.*]] = load float, ptr [[B]], align 4
 ; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP19]], 5.000000e-01
-; FVW2-NEXT:    [[B6:%.*]] = getelementptr inbounds [[STRUCT_OUT]], %struct.Out* [[OUT]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    store float [[ADD]], float* [[B6]], align 4
+; FVW2-NEXT:    [[B6:%.*]] = getelementptr inbounds [[STRUCT_OUT]], ptr [[OUT]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    store float [[ADD]], ptr [[B6]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -424,17 +418,17 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %b = getelementptr inbounds %struct.In, %struct.In* %in, i64 %indvars.iv, i32 1
-  %1 = load float, float* %b, align 4
+  %b = getelementptr inbounds %struct.In, ptr %in, i64 %indvars.iv, i32 1
+  %1 = load float, ptr %b, align 4
   %add = fadd float %1, 5.000000e-01
-  %b6 = getelementptr inbounds %struct.Out, %struct.Out* %out, i64 %indvars.iv, i32 1
-  store float %add, float* %b6, align 4
+  %b6 = getelementptr inbounds %struct.Out, ptr %out, i64 %indvars.iv, i32 1
+  store float %add, ptr %b6, align 4
   br label %for.inc
 
 for.inc:
@@ -445,25 +439,25 @@ for.inc:
 for.end:
   ret void
 }
-declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>)
 
 ; The same as @foo2 but scatter/gather argument is a vecotr of ptrs with addresspace 1
 
-define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
+define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out, ptr noalias %trigger, ptr noalias %index) #0 {
 ; AVX512-LABEL: @foo2_addrspace(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
+; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], [[STRUCT_IN]] addrspace(1)* [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1f32(<16 x float addrspace(1)*> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
 ; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1f32(<16 x float> [[TMP3]], <16 x float addrspace(1)*> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
 ; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -473,16 +467,16 @@ define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspac
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP6]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], [[STRUCT_IN]] addrspace(1)* [[IN]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    [[TMP7:%.*]] = load float, float addrspace(1)* [[B]], align 4
+; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr addrspace(1) [[IN]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    [[TMP7:%.*]] = load float, ptr addrspace(1) [[B]], align 4
 ; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP7]], 5.000000e-01
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    store float [[ADD]], float addrspace(1)* [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    store float [[ADD]], ptr addrspace(1) [[ARRAYIDX5]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -500,30 +494,30 @@ define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspac
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; FVW2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
-; FVW2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; FVW2-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
 ; FVW2-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], [[STRUCT_IN]] addrspace(1)* [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1f32(<2 x float addrspace(1)*> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
 ; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
 ; FVW2-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; FVW2-NEXT:    store float [[TMP13]], float addrspace(1)* [[TMP12]], align 4
+; FVW2-NEXT:    store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; FVW2:       pred.store.continue:
 ; FVW2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
 ; FVW2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.if2:
-; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; FVW2-NEXT:    store float [[TMP16]], float addrspace(1)* [[TMP15]], align 4
+; FVW2-NEXT:    store float [[TMP16]], ptr addrspace(1) [[TMP15]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
@@ -535,16 +529,16 @@ define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspac
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP18]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], [[STRUCT_IN]] addrspace(1)* [[IN]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    [[TMP19:%.*]] = load float, float addrspace(1)* [[B]], align 4
+; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr addrspace(1) [[IN]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    [[TMP19:%.*]] = load float, ptr addrspace(1) [[B]], align 4
 ; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP19]], 5.000000e-01
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    store float [[ADD]], float addrspace(1)* [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    store float [[ADD]], ptr addrspace(1) [[ARRAYIDX5]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -558,17 +552,17 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, i64 %indvars.iv, i32 1
-  %1 = load float, float addrspace(1)* %b, align 4
+  %b = getelementptr inbounds %struct.In, ptr addrspace(1) %in, i64 %indvars.iv, i32 1
+  %1 = load float, ptr addrspace(1) %b, align 4
   %add = fadd float %1, 5.000000e-01
-  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %out, i64 %indvars.iv
-  store float %add, float addrspace(1)* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr addrspace(1) %out, i64 %indvars.iv
+  store float %add, ptr addrspace(1) %arrayidx5, align 4
   br label %for.inc
 
 for.inc:
@@ -582,21 +576,21 @@ for.end:
 
 ; Same as foo2_addrspace but here only the input has the non-default address space.
 
-define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspace(0)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noalias %out, ptr noalias %trigger, ptr noalias %index) {
 ; AVX512-LABEL: @foo2_addrspace2(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
+; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], [[STRUCT_IN]] addrspace(1)* [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1f32(<16 x float addrspace(1)*> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
 ; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[OUT:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP3]], <16 x float*> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
 ; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -606,16 +600,16 @@ define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspa
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP6]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], [[STRUCT_IN]] addrspace(1)* [[IN]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    [[TMP7:%.*]] = load float, float addrspace(1)* [[B]], align 4
+; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr addrspace(1) [[IN]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    [[TMP7:%.*]] = load float, ptr addrspace(1) [[B]], align 4
 ; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP7]], 5.000000e-01
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    store float [[ADD]], float* [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    store float [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -633,30 +627,30 @@ define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspa
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; FVW2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
-; FVW2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; FVW2-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
 ; FVW2-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], [[STRUCT_IN]] addrspace(1)* [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1f32(<2 x float addrspace(1)*> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
 ; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
 ; FVW2-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; FVW2-NEXT:    store float [[TMP13]], float* [[TMP12]], align 4
+; FVW2-NEXT:    store float [[TMP13]], ptr [[TMP12]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; FVW2:       pred.store.continue:
 ; FVW2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
 ; FVW2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.if2:
-; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; FVW2-NEXT:    store float [[TMP16]], float* [[TMP15]], align 4
+; FVW2-NEXT:    store float [[TMP16]], ptr [[TMP15]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
@@ -668,16 +662,16 @@ define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspa
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP18]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], [[STRUCT_IN]] addrspace(1)* [[IN]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    [[TMP19:%.*]] = load float, float addrspace(1)* [[B]], align 4
+; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr addrspace(1) [[IN]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    [[TMP19:%.*]] = load float, ptr addrspace(1) [[B]], align 4
 ; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP19]], 5.000000e-01
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[OUT]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    store float [[ADD]], float* [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    store float [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -691,17 +685,17 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, i64 %indvars.iv, i32 1
-  %1 = load float, float addrspace(1)* %b, align 4
+  %b = getelementptr inbounds %struct.In, ptr addrspace(1) %in, i64 %indvars.iv, i32 1
+  %1 = load float, ptr addrspace(1) %b, align 4
   %add = fadd float %1, 5.000000e-01
-  %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %out, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   br label %for.inc
 
 for.inc:
@@ -715,21 +709,21 @@ for.end:
 
 ; Same as foo2_addrspace but here only the output has the non-default address space.
 
-define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noalias %out, ptr noalias %trigger, ptr noalias %index) {
 ; AVX512-LABEL: @foo2_addrspace3(
 ; AVX512-NEXT:  iter.check:
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
 ; AVX512-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
+; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
-; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
-; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
+; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
+; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
 ; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
-; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT:%.*]], <16 x i64> [[VEC_IND]]
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1f32(<16 x float> [[TMP3]], <16 x float addrspace(1)*> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
+; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
 ; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -739,16 +733,16 @@ define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspa
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
 ; AVX512-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; AVX512-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP6]], 0
 ; AVX512-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; AVX512:       if.then:
-; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; AVX512-NEXT:    [[TMP7:%.*]] = load float, float* [[B]], align 4
+; AVX512-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; AVX512-NEXT:    [[TMP7:%.*]] = load float, ptr [[B]], align 4
 ; AVX512-NEXT:    [[ADD:%.*]] = fadd float [[TMP7]], 5.000000e-01
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[INDVARS_IV]]
-; AVX512-NEXT:    store float [[ADD]], float addrspace(1)* [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[INDVARS_IV]]
+; AVX512-NEXT:    store float [[ADD]], ptr addrspace(1) [[ARRAYIDX5]], align 4
 ; AVX512-NEXT:    br label [[FOR_INC]]
 ; AVX512:       for.inc:
 ; AVX512-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -766,30 +760,30 @@ define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspa
 ; FVW2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
 ; FVW2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; FVW2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
-; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[TMP0]]
-; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; FVW2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
-; FVW2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
+; FVW2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; FVW2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; FVW2-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
 ; FVW2-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
-; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], %struct.In* [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
-; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
+; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
+; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
 ; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
-; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT:%.*]], i64 [[TMP0]]
+; FVW2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
 ; FVW2-NEXT:    [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
-; FVW2-NEXT:    store float [[TMP13]], float addrspace(1)* [[TMP12]], align 4
+; FVW2-NEXT:    store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; FVW2:       pred.store.continue:
 ; FVW2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
 ; FVW2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.if2:
-; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[TMP1]]
+; FVW2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[TMP1]]
 ; FVW2-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
-; FVW2-NEXT:    store float [[TMP16]], float addrspace(1)* [[TMP15]], align 4
+; FVW2-NEXT:    store float [[TMP16]], ptr addrspace(1) [[TMP15]], align 4
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
@@ -801,16 +795,16 @@ define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspa
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
 ; FVW2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 4096, [[MIDDLE_BLOCK]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; FVW2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP18]], 0
 ; FVW2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; FVW2:       if.then:
-; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], %struct.In* [[IN]], i64 [[INDVARS_IV]], i32 1
-; FVW2-NEXT:    [[TMP19:%.*]] = load float, float* [[B]], align 4
+; FVW2-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_IN]], ptr [[IN]], i64 [[INDVARS_IV]], i32 1
+; FVW2-NEXT:    [[TMP19:%.*]] = load float, ptr [[B]], align 4
 ; FVW2-NEXT:    [[ADD:%.*]] = fadd float [[TMP19]], 5.000000e-01
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float addrspace(1)* [[OUT]], i64 [[INDVARS_IV]]
-; FVW2-NEXT:    store float [[ADD]], float addrspace(1)* [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT]], i64 [[INDVARS_IV]]
+; FVW2-NEXT:    store float [[ADD]], ptr addrspace(1) [[ARRAYIDX5]], align 4
 ; FVW2-NEXT:    br label [[FOR_INC]]
 ; FVW2:       for.inc:
 ; FVW2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
@@ -824,17 +818,17 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %trigger, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %b = getelementptr inbounds %struct.In, %struct.In* %in, i64 %indvars.iv, i32 1
-  %1 = load float, float* %b, align 4
+  %b = getelementptr inbounds %struct.In, ptr %in, i64 %indvars.iv, i32 1
+  %1 = load float, ptr %b, align 4
   %add = fadd float %1, 5.000000e-01
-  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %out, i64 %indvars.iv
-  store float %add, float addrspace(1)* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr addrspace(1) %out, i64 %indvars.iv
+  store float %add, ptr addrspace(1) %arrayidx5, align 4
   br label %for.inc
 
 for.inc:
@@ -847,13 +841,11 @@ for.end:
 }
 
 ; Using gathers is not profitable for this function. PR48429.
-define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly noalias %ptr, float* nocapture noalias %dest) {
+define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %ptr, ptr nocapture noalias %dest) {
 ; AVX512-LABEL: @test_gather_not_profitable_pr48429(
 ; AVX512-NEXT:  entry:
-; AVX512-NEXT:    [[DEST1:%.*]] = bitcast float* [[DEST:%.*]] to i8*
-; AVX512-NEXT:    [[PTR3:%.*]] = bitcast float* [[PTR:%.*]] to i8*
 ; AVX512-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64
-; AVX512-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 [[IDX_EXT]]
+; AVX512-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]]
 ; AVX512-NEXT:    [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0
 ; AVX512-NEXT:    br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[ITER_CHECK:%.*]]
 ; AVX512:       iter.check:
@@ -869,126 +861,122 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly noalias
 ; AVX512-NEXT:    [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2
 ; AVX512-NEXT:    [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4
 ; AVX512-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 2
-; AVX512-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 4
-; AVX512-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 2
-; AVX512-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP8]]
-; AVX512-NEXT:    [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
-; AVX512-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1
-; AVX512-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]]
-; AVX512-NEXT:    [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
-; AVX512-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]]
-; AVX512-NEXT:    [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
-; AVX512-NEXT:    [[TMP10:%.*]] = add i64 [[TMP6]], 1
-; AVX512-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]]
-; AVX512-NEXT:    [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]]
-; AVX512-NEXT:    [[SCEVGEP89:%.*]] = bitcast float* [[SCEVGEP8]] to i8*
-; AVX512-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[DEST1]], [[SCEVGEP45]]
-; AVX512-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[PTR3]], [[SCEVGEP2]]
+; AVX512-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 6
+; AVX512-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8
+; AVX512-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]]
+; AVX512-NEXT:    [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2
+; AVX512-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 4
+; AVX512-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]]
+; AVX512-NEXT:    [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4
+; AVX512-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]]
+; AVX512-NEXT:    [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]]
+; AVX512-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]]
+; AVX512-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[UGLYGEP1]]
+; AVX512-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[UGLYGEP]]
 ; AVX512-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; AVX512-NEXT:    [[BOUND010:%.*]] = icmp ult i8* [[DEST1]], [[SCEVGEP89]]
-; AVX512-NEXT:    [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
-; AVX512-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
-; AVX512-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
+; AVX512-NEXT:    [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[UGLYGEP3]]
+; AVX512-NEXT:    [[BOUND15:%.*]] = icmp ult ptr [[UGLYGEP2]], [[UGLYGEP]]
+; AVX512-NEXT:    [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
+; AVX512-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
 ; AVX512-NEXT:    br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
 ; AVX512:       vector.main.loop.iter.check:
-; AVX512-NEXT:    [[MIN_ITERS_CHECK13:%.*]] = icmp ult i64 [[TMP3]], 16
-; AVX512-NEXT:    br i1 [[MIN_ITERS_CHECK13]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; AVX512-NEXT:    [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[TMP3]], 16
+; AVX512-NEXT:    br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; AVX512:       vector.ph:
 ; AVX512-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
 ; AVX512-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
-; AVX512-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]]
-; AVX512-NEXT:    [[TMP12:%.*]] = mul i64 [[N_VEC]], 16
-; AVX512-NEXT:    [[IND_END15:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]]
+; AVX512-NEXT:    [[TMP13:%.*]] = mul i64 [[N_VEC]], 4
+; AVX512-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP13]]
+; AVX512-NEXT:    [[TMP14:%.*]] = mul i64 [[N_VEC]], 64
+; AVX512-NEXT:    [[IND_END9:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP14]]
 ; AVX512-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; AVX512:       vector.body:
-; AVX512-NEXT:    [[POINTER_PHI:%.*]] = phi float* [ [[DEST]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; AVX512-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[DEST]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 0
-; AVX512-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP13]]
-; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr float, float* [[POINTER_PHI]], <16 x i64> <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>
-; AVX512-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]]
-; AVX512-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP15]], i32 0
-; AVX512-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP16]] to <16 x float>*
-; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, <16 x float>* [[TMP17]], align 4, !alias.scope !14
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD]], <16 x float*> [[TMP14]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !17, !noalias !19
-; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr float, float* [[NEXT_GEP]], i32 0
-; AVX512-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP18]] to <16 x float>*
-; AVX512-NEXT:    [[WIDE_LOAD14:%.*]] = load <16 x float>, <16 x float>* [[TMP19]], align 4, !alias.scope !21
-; AVX512-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, <16 x float*> [[TMP14]], i64 1
-; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[WIDE_LOAD14]], <16 x float*> [[TMP20]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !17, !noalias !19
+; AVX512-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 0
+; AVX512-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
+; AVX512-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP16]]
+; AVX512-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448, i64 512, i64 576, i64 640, i64 704, i64 768, i64 832, i64 896, i64 960>
+; AVX512-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
+; AVX512-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0
+; AVX512-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP19]], align 4, !alias.scope !14
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> [[TMP17]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !17, !noalias !19
+; AVX512-NEXT:    [[TMP20:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0
+; AVX512-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x float>, ptr [[TMP20]], align 4, !alias.scope !21
+; AVX512-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP17]], i64 1
+; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD8]], <16 x ptr> [[TMP21]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !17, !noalias !19
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; AVX512-NEXT:    [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i64 256
-; AVX512-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX512-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; AVX512-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024
+; AVX512-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; AVX512-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; AVX512:       middle.block:
 ; AVX512-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; AVX512-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; AVX512:       vec.epilog.iter.check:
-; AVX512-NEXT:    [[TMP22:%.*]] = mul i64 [[N_VEC]], 16
-; AVX512-NEXT:    [[IND_END23:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP22]]
-; AVX512-NEXT:    [[IND_END20:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]]
+; AVX512-NEXT:    [[TMP23:%.*]] = mul i64 [[N_VEC]], 64
+; AVX512-NEXT:    [[IND_END17:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP23]]
+; AVX512-NEXT:    [[TMP24:%.*]] = mul i64 [[N_VEC]], 4
+; AVX512-NEXT:    [[IND_END14:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP24]]
 ; AVX512-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
 ; AVX512-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
 ; AVX512-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; AVX512:       vec.epilog.ph:
-; AVX512-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float* [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; AVX512-NEXT:    [[BC_RESUME_VAL16:%.*]] = phi float* [ [[IND_END15]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; AVX512-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; AVX512-NEXT:    [[BC_RESUME_VAL10:%.*]] = phi ptr [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; AVX512-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; AVX512-NEXT:    [[N_MOD_VF17:%.*]] = urem i64 [[TMP3]], 8
-; AVX512-NEXT:    [[N_VEC18:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF17]]
-; AVX512-NEXT:    [[IND_END19:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC18]]
-; AVX512-NEXT:    [[TMP23:%.*]] = mul i64 [[N_VEC18]], 16
-; AVX512-NEXT:    [[IND_END22:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP23]]
+; AVX512-NEXT:    [[N_MOD_VF11:%.*]] = urem i64 [[TMP3]], 8
+; AVX512-NEXT:    [[N_VEC12:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF11]]
+; AVX512-NEXT:    [[TMP25:%.*]] = mul i64 [[N_VEC12]], 4
+; AVX512-NEXT:    [[IND_END13:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP25]]
+; AVX512-NEXT:    [[TMP26:%.*]] = mul i64 [[N_VEC12]], 64
+; AVX512-NEXT:    [[IND_END16:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP26]]
 ; AVX512-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; AVX512:       vec.epilog.vector.body:
-; AVX512-NEXT:    [[POINTER_PHI28:%.*]] = phi float* [ [[BC_RESUME_VAL16]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; AVX512-NEXT:    [[INDEX26:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; AVX512-NEXT:    [[TMP24:%.*]] = add i64 [[INDEX26]], 0
-; AVX512-NEXT:    [[NEXT_GEP27:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP24]]
-; AVX512-NEXT:    [[TMP25:%.*]] = getelementptr float, float* [[POINTER_PHI28]], <8 x i64> <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112>
-; AVX512-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP27]], i64 [[IDXPROM]]
-; AVX512-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP26]], i32 0
-; AVX512-NEXT:    [[TMP28:%.*]] = bitcast float* [[TMP27]] to <8 x float>*
-; AVX512-NEXT:    [[WIDE_LOAD30:%.*]] = load <8 x float>, <8 x float>* [[TMP28]], align 4, !alias.scope !23
-; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> [[WIDE_LOAD30]], <8 x float*> [[TMP25]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !26, !noalias !28
-; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr float, float* [[NEXT_GEP27]], i32 0
-; AVX512-NEXT:    [[TMP30:%.*]] = bitcast float* [[TMP29]] to <8 x float>*
-; AVX512-NEXT:    [[WIDE_LOAD31:%.*]] = load <8 x float>, <8 x float>* [[TMP30]], align 4, !alias.scope !30
-; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, <8 x float*> [[TMP25]], i64 1
-; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> [[WIDE_LOAD31]], <8 x float*> [[TMP31]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !26, !noalias !28
-; AVX512-NEXT:    [[INDEX_NEXT32]] = add nuw i64 [[INDEX26]], 8
-; AVX512-NEXT:    [[PTR_IND29]] = getelementptr float, float* [[POINTER_PHI28]], i64 128
-; AVX512-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT32]], [[N_VEC18]]
-; AVX512-NEXT:    br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; AVX512-NEXT:    [[POINTER_PHI22:%.*]] = phi ptr [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; AVX512-NEXT:    [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; AVX512-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX20]], 0
+; AVX512-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 4
+; AVX512-NEXT:    [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP28]]
+; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI22]], <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448>
+; AVX512-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP21]], i64 [[IDXPROM]]
+; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP30]], i32 0
+; AVX512-NEXT:    [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope !23
+; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP29]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !26, !noalias !28
+; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr float, ptr [[NEXT_GEP21]], i32 0
+; AVX512-NEXT:    [[WIDE_LOAD25:%.*]] = load <8 x float>, ptr [[TMP32]], align 4, !alias.scope !30
+; AVX512-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP29]], i64 1
+; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD25]], <8 x ptr> [[TMP33]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope !26, !noalias !28
+; AVX512-NEXT:    [[INDEX_NEXT26]] = add nuw i64 [[INDEX20]], 8
+; AVX512-NEXT:    [[PTR_IND23]] = getelementptr i8, ptr [[POINTER_PHI22]], i64 512
+; AVX512-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT26]], [[N_VEC12]]
+; AVX512-NEXT:    br i1 [[TMP34]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
 ; AVX512:       vec.epilog.middle.block:
-; AVX512-NEXT:    [[CMP_N25:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC18]]
-; AVX512-NEXT:    br i1 [[CMP_N25]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
+; AVX512-NEXT:    [[CMP_N19:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC12]]
+; AVX512-NEXT:    br i1 [[CMP_N19]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
 ; AVX512:       vec.epilog.scalar.ph:
-; AVX512-NEXT:    [[BC_RESUME_VAL21:%.*]] = phi float* [ [[IND_END19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END20]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[ITER_CHECK]] ]
-; AVX512-NEXT:    [[BC_RESUME_VAL24:%.*]] = phi float* [ [[IND_END22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[ITER_CHECK]] ]
+; AVX512-NEXT:    [[BC_RESUME_VAL15:%.*]] = phi ptr [ [[IND_END13]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END14]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ], [ [[PTR]], [[ITER_CHECK]] ]
+; AVX512-NEXT:    [[BC_RESUME_VAL18:%.*]] = phi ptr [ [[IND_END16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END17]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ], [ [[DEST]], [[ITER_CHECK]] ]
 ; AVX512-NEXT:    br label [[FOR_BODY:%.*]]
 ; AVX512:       for.body:
-; AVX512-NEXT:    [[PTR_ADDR_012:%.*]] = phi float* [ [[BC_RESUME_VAL21]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; AVX512-NEXT:    [[DEST_ADDR_011:%.*]] = phi float* [ [[BC_RESUME_VAL24]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ]
-; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]]
-; AVX512-NEXT:    [[TMP33:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; AVX512-NEXT:    store float [[TMP33]], float* [[DEST_ADDR_011]], align 4
-; AVX512-NEXT:    [[TMP34:%.*]] = load float, float* [[PTR_ADDR_012]], align 4
-; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1
-; AVX512-NEXT:    store float [[TMP34]], float* [[ARRAYIDX5]], align 4
-; AVX512-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1
-; AVX512-NEXT:    [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16
-; AVX512-NEXT:    [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]]
+; AVX512-NEXT:    [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; AVX512-NEXT:    [[DEST_ADDR_011:%.*]] = phi ptr [ [[BC_RESUME_VAL18]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ]
+; AVX512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 [[IDXPROM]]
+; AVX512-NEXT:    [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; AVX512-NEXT:    store float [[TMP35]], ptr [[DEST_ADDR_011]], align 4
+; AVX512-NEXT:    [[TMP36:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4
+; AVX512-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 1
+; AVX512-NEXT:    store float [[TMP36]], ptr [[ARRAYIDX5]], align 4
+; AVX512-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1
+; AVX512-NEXT:    [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16
+; AVX512-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]]
 ; AVX512-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; AVX512:       for.end:
 ; AVX512-NEXT:    ret void
 ;
 ; FVW2-LABEL: @test_gather_not_profitable_pr48429(
 ; FVW2-NEXT:  entry:
-; FVW2-NEXT:    [[DEST1:%.*]] = bitcast float* [[DEST:%.*]] to i8*
-; FVW2-NEXT:    [[PTR3:%.*]] = bitcast float* [[PTR:%.*]] to i8*
 ; FVW2-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64
-; FVW2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 [[IDX_EXT]]
+; FVW2-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]]
 ; FVW2-NEXT:    [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0
 ; FVW2-NEXT:    br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]]
 ; FVW2:       for.body.lr.ph:
@@ -1004,90 +992,87 @@ define void @test_gather_not_profitable_pr48429(i32 %d, float* readonly noalias
 ; FVW2-NEXT:    [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2
 ; FVW2-NEXT:    [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4
 ; FVW2-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP5]], 2
-; FVW2-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 4
-; FVW2-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 2
-; FVW2-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP8]]
-; FVW2-NEXT:    [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
-; FVW2-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP6]], 1
-; FVW2-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP9]]
-; FVW2-NEXT:    [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
-; FVW2-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[PTR]], i64 [[IDXPROM]]
-; FVW2-NEXT:    [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
-; FVW2-NEXT:    [[TMP10:%.*]] = add i64 [[TMP6]], 1
-; FVW2-NEXT:    [[TMP11:%.*]] = sub i64 [[TMP10]], [[IDX_EXT]]
-; FVW2-NEXT:    [[SCEVGEP8:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP11]]
-; FVW2-NEXT:    [[SCEVGEP89:%.*]] = bitcast float* [[SCEVGEP8]] to i8*
-; FVW2-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[DEST1]], [[SCEVGEP45]]
-; FVW2-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[PTR3]], [[SCEVGEP2]]
+; FVW2-NEXT:    [[TMP7:%.*]] = shl i64 [[TMP6]], 6
+; FVW2-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8
+; FVW2-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]]
+; FVW2-NEXT:    [[TMP9:%.*]] = shl nuw i64 [[TMP6]], 2
+; FVW2-NEXT:    [[TMP10:%.*]] = add i64 [[TMP9]], 4
+; FVW2-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP10]]
+; FVW2-NEXT:    [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4
+; FVW2-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]]
+; FVW2-NEXT:    [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP4]]
+; FVW2-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP12]]
+; FVW2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[UGLYGEP1]]
+; FVW2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[UGLYGEP]]
 ; FVW2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; FVW2-NEXT:    [[BOUND010:%.*]] = icmp ult i8* [[DEST1]], [[SCEVGEP89]]
-; FVW2-NEXT:    [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
-; FVW2-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
-; FVW2-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
+; FVW2-NEXT:    [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[UGLYGEP3]]
+; FVW2-NEXT:    [[BOUND15:%.*]] = icmp ult ptr [[UGLYGEP2]], [[UGLYGEP]]
+; FVW2-NEXT:    [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
+; FVW2-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
 ; FVW2-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; FVW2:       vector.ph:
 ; FVW2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
 ; FVW2-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
-; FVW2-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[PTR]], i64 [[N_VEC]]
-; FVW2-NEXT:    [[TMP12:%.*]] = mul i64 [[N_VEC]], 16
-; FVW2-NEXT:    [[IND_END13:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP12]]
+; FVW2-NEXT:    [[TMP13:%.*]] = mul i64 [[N_VEC]], 4
+; FVW2-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP13]]
+; FVW2-NEXT:    [[TMP14:%.*]] = mul i64 [[N_VEC]], 64
+; FVW2-NEXT:    [[IND_END7:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP14]]
 ; FVW2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; FVW2:       vector.body:
 ; FVW2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; FVW2-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 0
-; FVW2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[PTR]], i64 [[TMP13]]
-; FVW2-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 0
-; FVW2-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 16
-; FVW2-NEXT:    [[NEXT_GEP15:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP15]]
-; FVW2-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 1
-; FVW2-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 16
-; FVW2-NEXT:    [[NEXT_GEP16:%.*]] = getelementptr float, float* [[DEST]], i64 [[TMP17]]
-; FVW2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP]], i64 [[IDXPROM]]
-; FVW2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 0
-; FVW2-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP19]] to <2 x float>*
-; FVW2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP20]], align 4, !alias.scope !14
-; FVW2-NEXT:    [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
-; FVW2-NEXT:    store float [[TMP21]], float* [[NEXT_GEP15]], align 4, !alias.scope !17, !noalias !19
-; FVW2-NEXT:    [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
-; FVW2-NEXT:    store float [[TMP22]], float* [[NEXT_GEP16]], align 4, !alias.scope !17, !noalias !19
-; FVW2-NEXT:    [[TMP23:%.*]] = getelementptr float, float* [[NEXT_GEP]], i32 0
-; FVW2-NEXT:    [[TMP24:%.*]] = bitcast float* [[TMP23]] to <2 x float>*
-; FVW2-NEXT:    [[WIDE_LOAD17:%.*]] = load <2 x float>, <2 x float>* [[TMP24]], align 4, !alias.scope !21
-; FVW2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP15]], i64 1
-; FVW2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[NEXT_GEP16]], i64 1
-; FVW2-NEXT:    [[TMP27:%.*]] = extractelement <2 x float> [[WIDE_LOAD17]], i32 0
-; FVW2-NEXT:    store float [[TMP27]], float* [[TMP25]], align 4, !alias.scope !17, !noalias !19
-; FVW2-NEXT:    [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD17]], i32 1
-; FVW2-NEXT:    store float [[TMP28]], float* [[TMP26]], align 4, !alias.scope !17, !noalias !19
+; FVW2-NEXT:    [[TMP15:%.*]] = add i64 [[INDEX]], 0
+; FVW2-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 4
+; FVW2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP16]]
+; FVW2-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 0
+; FVW2-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP17]], 64
+; FVW2-NEXT:    [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]]
+; FVW2-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 1
+; FVW2-NEXT:    [[TMP20:%.*]] = mul i64 [[TMP19]], 64
+; FVW2-NEXT:    [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP20]]
+; FVW2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
+; FVW2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 0
+; FVW2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP22]], align 4, !alias.scope !14
+; FVW2-NEXT:    [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
+; FVW2-NEXT:    store float [[TMP23]], ptr [[NEXT_GEP9]], align 4, !alias.scope !17, !noalias !19
+; FVW2-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
+; FVW2-NEXT:    store float [[TMP24]], ptr [[NEXT_GEP10]], align 4, !alias.scope !17, !noalias !19
+; FVW2-NEXT:    [[TMP25:%.*]] = getelementptr float, ptr [[NEXT_GEP]], i32 0
+; FVW2-NEXT:    [[WIDE_LOAD11:%.*]] = load <2 x float>, ptr [[TMP25]], align 4, !alias.scope !21
+; FVW2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP9]], i64 1
+; FVW2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP10]], i64 1
+; FVW2-NEXT:    [[TMP28:%.*]] = extractelement <2 x float> [[WIDE_LOAD11]], i32 0
+; FVW2-NEXT:    store float [[TMP28]], ptr [[TMP26]], align 4, !alias.scope !17, !noalias !19
+; FVW2-NEXT:    [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD11]], i32 1
+; FVW2-NEXT:    store float [[TMP29]], ptr [[TMP27]], align 4, !alias.scope !17, !noalias !19
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; FVW2-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; FVW2-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; FVW2-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; FVW2-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; FVW2:       middle.block:
 ; FVW2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; FVW2-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[SCALAR_PH]]
 ; FVW2:       scalar.ph:
-; FVW2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi float* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ]
-; FVW2-NEXT:    [[BC_RESUME_VAL14:%.*]] = phi float* [ [[IND_END13]], [[MIDDLE_BLOCK]] ], [ [[DEST]], [[FOR_BODY_LR_PH]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ]
+; FVW2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[FOR_BODY_LR_PH]] ], [ [[PTR]], [[VECTOR_MEMCHECK]] ]
+; FVW2-NEXT:    [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[DEST]], [[FOR_BODY_LR_PH]] ], [ [[DEST]], [[VECTOR_MEMCHECK]] ]
 ; FVW2-NEXT:    br label [[FOR_BODY:%.*]]
 ; FVW2:       for.body:
-; FVW2-NEXT:    [[PTR_ADDR_012:%.*]] = phi float* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
-; FVW2-NEXT:    [[DEST_ADDR_011:%.*]] = phi float* [ [[BC_RESUME_VAL14]], [[SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ]
-; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 [[IDXPROM]]
-; FVW2-NEXT:    [[TMP30:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; FVW2-NEXT:    store float [[TMP30]], float* [[DEST_ADDR_011]], align 4
-; FVW2-NEXT:    [[TMP31:%.*]] = load float, float* [[PTR_ADDR_012]], align 4
-; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 1
-; FVW2-NEXT:    store float [[TMP31]], float* [[ARRAYIDX5]], align 4
-; FVW2-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR_ADDR_012]], i64 1
-; FVW2-NEXT:    [[ADD_PTR6]] = getelementptr inbounds float, float* [[DEST_ADDR_011]], i64 16
-; FVW2-NEXT:    [[CMP_NOT:%.*]] = icmp eq float* [[INCDEC_PTR]], [[ADD_PTR]]
+; FVW2-NEXT:    [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; FVW2-NEXT:    [[DEST_ADDR_011:%.*]] = phi ptr [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ]
+; FVW2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 [[IDXPROM]]
+; FVW2-NEXT:    [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; FVW2-NEXT:    store float [[TMP31]], ptr [[DEST_ADDR_011]], align 4
+; FVW2-NEXT:    [[TMP32:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4
+; FVW2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 1
+; FVW2-NEXT:    store float [[TMP32]], ptr [[ARRAYIDX5]], align 4
+; FVW2-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1
+; FVW2-NEXT:    [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16
+; FVW2-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]]
 ; FVW2-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; FVW2:       for.end:
 ; FVW2-NEXT:    ret void
 ;
 entry:
   %idx.ext = sext i32 %d to i64
-  %add.ptr = getelementptr inbounds float, float* %ptr, i64 %idx.ext
+  %add.ptr = getelementptr inbounds float, ptr %ptr, i64 %idx.ext
   %cmp.not10 = icmp eq i32 %d, 0
   br i1 %cmp.not10, label %for.end, label %for.body.lr.ph
 
@@ -1097,17 +1082,17 @@ for.body.lr.ph:
   br label %for.body
 
 for.body:
-  %ptr.addr.012 = phi float* [ %ptr, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
-  %dest.addr.011 = phi float* [ %dest, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %ptr.addr.012, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
-  store float %0, float* %dest.addr.011, align 4
-  %1 = load float, float* %ptr.addr.012, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %dest.addr.011, i64 1
-  store float %1, float* %arrayidx5, align 4
-  %incdec.ptr = getelementptr inbounds float, float* %ptr.addr.012, i64 1
-  %add.ptr6 = getelementptr inbounds float, float* %dest.addr.011, i64 16
-  %cmp.not = icmp eq float* %incdec.ptr, %add.ptr
+  %ptr.addr.012 = phi ptr [ %ptr, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
+  %dest.addr.011 = phi ptr [ %dest, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %ptr.addr.012, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
+  store float %0, ptr %dest.addr.011, align 4
+  %1 = load float, ptr %ptr.addr.012, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %dest.addr.011, i64 1
+  store float %1, ptr %arrayidx5, align 4
+  %incdec.ptr = getelementptr inbounds float, ptr %ptr.addr.012, i64 1
+  %add.ptr6 = getelementptr inbounds float, ptr %dest.addr.011, i64 16
+  %cmp.not = icmp eq ptr %incdec.ptr, %add.ptr
   br i1 %cmp.not, label %for.end, label %for.body
 
 for.end:

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll
index 833ff1c8859c4..bddde5d47fffb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-COST: Found an estimated cost of 1 for VF 8 For instruction:   %1 = tail call i16 @llvm.uadd.sat.i16(i16 %0, i16 %offset)
 ; CHECK-COST: Found an estimated cost of 4 for VF 16 For instruction:   %1 = tail call i16 @llvm.uadd.sat.i16(i16 %0, i16 %offset)
 
-define void @uaddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* nocapture noalias %pDst, i32 %blockSize) #0 {
+define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @uaddsat(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -40,87 +40,85 @@ define void @uaddsat(i16* nocapture readonly %pSrc, i16 signext %offset, i16* no
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[PSRC:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i16, i16* [[PDST:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i16>, <16 x i16>* [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 16
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <16 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i16>, <16 x i16>* [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 32
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <16 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <16 x i16>, <16 x i16>* [[TMP7]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[NEXT_GEP]], i64 48
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <16 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i16>, <16 x i16>* [[TMP9]], align 2
-; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD]], <16 x i16> [[BROADCAST_SPLAT]])
-; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD9]], <16 x i16> [[BROADCAST_SPLAT13]])
-; CHECK-NEXT:    [[TMP12:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD10]], <16 x i16> [[BROADCAST_SPLAT15]])
-; CHECK-NEXT:    [[TMP13:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD11]], <16 x i16> [[BROADCAST_SPLAT17]])
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i16* [[NEXT_GEP5]] to <16 x i16>*
-; CHECK-NEXT:    store <16 x i16> [[TMP10]], <16 x i16>* [[TMP14]], align 2
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 16
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i16* [[TMP15]] to <16 x i16>*
-; CHECK-NEXT:    store <16 x i16> [[TMP11]], <16 x i16>* [[TMP16]], align 2
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 32
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <16 x i16>*
-; CHECK-NEXT:    store <16 x i16> [[TMP12]], <16 x i16>* [[TMP18]], align 2
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i16, i16* [[NEXT_GEP5]], i64 48
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i16* [[TMP19]] to <16 x i16>*
-; CHECK-NEXT:    store <16 x i16> [[TMP13]], <16 x i16>* [[TMP20]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <16 x i16>, ptr [[TMP6]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i16>, ptr [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD]], <16 x i16> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD9]], <16 x i16> [[BROADCAST_SPLAT13]])
+; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD10]], <16 x i16> [[BROADCAST_SPLAT15]])
+; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD11]], <16 x i16> [[BROADCAST_SPLAT17]])
+; CHECK-NEXT:    store <16 x i16> [[TMP8]], ptr [[NEXT_GEP5]], align 2
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 16
+; CHECK-NEXT:    store <16 x i16> [[TMP9]], ptr [[TMP12]], align 2
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 32
+; CHECK-NEXT:    store <16 x i16> [[TMP10]], ptr [[TMP13]], align 2
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i16, ptr [[NEXT_GEP5]], i64 48
+; CHECK-NEXT:    store <16 x i16> [[TMP11]], ptr [[TMP14]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[IND_END26:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END23:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[CAST_VTC20:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT:    [[IND_END21:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC20]]
+; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END30:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = shl nuw nsw i64 [[N_VEC]], 1
+; CHECK-NEXT:    [[IND_END27:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP17]]
+; CHECK-NEXT:    [[DOTCAST23:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END24:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST23]]
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
 ; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[N_VEC19:%.*]] = and i64 [[TMP2]], -8
-; CHECK-NEXT:    [[CAST_VTC:%.*]] = trunc i64 [[N_VEC19]] to i32
-; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]]
-; CHECK-NEXT:    [[IND_END22:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[N_VEC19]]
-; CHECK-NEXT:    [[IND_END25:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[N_VEC19]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT34:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT33]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[N_VEC21:%.*]] = and i64 [[TMP2]], -8
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC21]] to i32
+; CHECK-NEXT:    [[IND_END22:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
+; CHECK-NEXT:    [[TMP18:%.*]] = shl nuw nsw i64 [[N_VEC21]], 1
+; CHECK-NEXT:    [[IND_END26:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP19:%.*]] = shl nuw nsw i64 [[N_VEC21]], 1
+; CHECK-NEXT:    [[IND_END29:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP19]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT37:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT38:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT37]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP30:%.*]] = getelementptr i16, i16* [[PSRC]], i64 [[INDEX29]]
-; CHECK-NEXT:    [[NEXT_GEP31:%.*]] = getelementptr i16, i16* [[PDST]], i64 [[INDEX29]]
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i16* [[NEXT_GEP30]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD32:%.*]] = load <8 x i16>, <8 x i16>* [[TMP22]], align 2
-; CHECK-NEXT:    [[TMP23:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[WIDE_LOAD32]], <8 x i16> [[BROADCAST_SPLAT34]])
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i16* [[NEXT_GEP31]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP23]], <8 x i16>* [[TMP24]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT35]] = add nuw i64 [[INDEX29]], 8
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC19]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    [[INDEX33:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP20:%.*]] = shl i64 [[INDEX33]], 1
+; CHECK-NEXT:    [[NEXT_GEP34:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP21:%.*]] = shl i64 [[INDEX33]], 1
+; CHECK-NEXT:    [[NEXT_GEP35:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP21]]
+; CHECK-NEXT:    [[WIDE_LOAD36:%.*]] = load <8 x i16>, ptr [[NEXT_GEP34]], align 2
+; CHECK-NEXT:    [[TMP22:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[WIDE_LOAD36]], <8 x i16> [[BROADCAST_SPLAT38]])
+; CHECK-NEXT:    store <8 x i16> [[TMP22]], ptr [[NEXT_GEP35]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT39]] = add nuw i64 [[INDEX33]], 8
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC21]]
+; CHECK-NEXT:    br i1 [[TMP23]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N28:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC19]]
-; CHECK-NEXT:    br i1 [[CMP_N28]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N32:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC21]]
+; CHECK-NEXT:    br i1 [[CMP_N32]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END21]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL24:%.*]] = phi i16* [ [[IND_END22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL27:%.*]] = phi i16* [ [[IND_END25]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END26]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL25:%.*]] = phi i32 [ [[IND_END22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END24]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL28:%.*]] = phi ptr [ [[IND_END26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL31:%.*]] = phi ptr [ [[IND_END29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END30]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL24]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi i16* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL27]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRC_ADDR_08]], i64 1
-; CHECK-NEXT:    [[TMP26:%.*]] = load i16, i16* [[PSRC_ADDR_08]], align 2
-; CHECK-NEXT:    [[TMP27:%.*]] = tail call i16 @llvm.uadd.sat.i16(i16 [[TMP26]], i16 [[OFFSET]])
-; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i16, i16* [[PDST_ADDR_07]], i64 1
-; CHECK-NEXT:    store i16 [[TMP27]], i16* [[PDST_ADDR_07]], align 2
+; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL25]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL28]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL31]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, ptr [[PSRC_ADDR_08]], i64 1
+; CHECK-NEXT:    [[TMP24:%.*]] = load i16, ptr [[PSRC_ADDR_08]], align 2
+; CHECK-NEXT:    [[TMP25:%.*]] = tail call i16 @llvm.uadd.sat.i16(i16 [[TMP24]], i16 [[OFFSET]])
+; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i16, ptr [[PDST_ADDR_07]], i64 1
+; CHECK-NEXT:    store i16 [[TMP25]], ptr [[PDST_ADDR_07]], align 2
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_09]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -133,13 +131,13 @@ entry:
 
 while.body:                                       ; preds = %entry, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pSrc.addr.08 = phi i16* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
-  %pDst.addr.07 = phi i16* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
-  %incdec.ptr = getelementptr inbounds i16, i16* %pSrc.addr.08, i32 1
-  %0 = load i16, i16* %pSrc.addr.08, align 2
+  %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
+  %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, ptr %pSrc.addr.08, i32 1
+  %0 = load i16, ptr %pSrc.addr.08, align 2
   %1 = tail call i16 @llvm.uadd.sat.i16(i16 %0, i16 %offset)
-  %incdec.ptr3 = getelementptr inbounds i16, i16* %pDst.addr.07, i32 1
-  store i16 %1, i16* %pDst.addr.07, align 2
+  %incdec.ptr3 = getelementptr inbounds i16, ptr %pDst.addr.07, i32 1
+  store i16 %1, ptr %pDst.addr.07, align 2
   %dec = add i32 %blkCnt.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body
@@ -156,7 +154,7 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK-COST: Found an estimated cost of 1 for VF 16 For instruction:   %1 = tail call i8 @llvm.fshl.i8(i8 %0, i8 %0, i8 %offset)
 ; CHECK-COST: Found an estimated cost of 4 for VF 32 For instruction:   %1 = tail call i8 @llvm.fshl.i8(i8 %0, i8 %0, i8 %offset)
 
-define void @fshl(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocapture noalias %pDst, i32 %blockSize) #0 {
+define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocapture noalias %pDst, i32 %blockSize) #0 {
 ; CHECK-LABEL: @fshl(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP_NOT6:%.*]] = icmp eq i32 [[BLOCKSIZE:%.*]], 0
@@ -183,87 +181,77 @@ define void @fshl(i8* nocapture readonly %pSrc, i8 signext %offset, i8* nocaptur
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PSRC:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, i8* [[PDST:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, <32 x i8>* [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 32
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <32 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 64
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <32 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <32 x i8>, <32 x i8>* [[TMP7]], align 2
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i64 96
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <32 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <32 x i8>, <32 x i8>* [[TMP9]], align 2
-; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD]], <32 x i8> [[WIDE_LOAD]], <32 x i8> [[BROADCAST_SPLAT]])
-; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD9]], <32 x i8> [[WIDE_LOAD9]], <32 x i8> [[BROADCAST_SPLAT13]])
-; CHECK-NEXT:    [[TMP12:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD10]], <32 x i8> [[WIDE_LOAD10]], <32 x i8> [[BROADCAST_SPLAT15]])
-; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD11]], <32 x i8> [[WIDE_LOAD11]], <32 x i8> [[BROADCAST_SPLAT17]])
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[NEXT_GEP5]] to <32 x i8>*
-; CHECK-NEXT:    store <32 x i8> [[TMP10]], <32 x i8>* [[TMP14]], align 2
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 32
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <32 x i8>*
-; CHECK-NEXT:    store <32 x i8> [[TMP11]], <32 x i8>* [[TMP16]], align 2
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 64
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[TMP17]] to <32 x i8>*
-; CHECK-NEXT:    store <32 x i8> [[TMP12]], <32 x i8>* [[TMP18]], align 2
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, i8* [[NEXT_GEP5]], i64 96
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <32 x i8>*
-; CHECK-NEXT:    store <32 x i8> [[TMP13]], <32 x i8>* [[TMP20]], align 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <32 x i8>, ptr [[TMP5]], align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD]], <32 x i8> [[WIDE_LOAD]], <32 x i8> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD9]], <32 x i8> [[WIDE_LOAD9]], <32 x i8> [[BROADCAST_SPLAT13]])
+; CHECK-NEXT:    [[TMP8:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD10]], <32 x i8> [[WIDE_LOAD10]], <32 x i8> [[BROADCAST_SPLAT15]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD11]], <32 x i8> [[WIDE_LOAD11]], <32 x i8> [[BROADCAST_SPLAT17]])
+; CHECK-NEXT:    store <32 x i8> [[TMP6]], ptr [[NEXT_GEP5]], align 2
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 32
+; CHECK-NEXT:    store <32 x i8> [[TMP7]], ptr [[TMP10]], align 2
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 64
+; CHECK-NEXT:    store <32 x i8> [[TMP8]], ptr [[TMP11]], align 2
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 96
+; CHECK-NEXT:    store <32 x i8> [[TMP9]], ptr [[TMP12]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
-; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[WHILE_END]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[IND_END26:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END23:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[CAST_VTC20:%.*]] = trunc i64 [[N_VEC]] to i32
-; CHECK-NEXT:    [[IND_END21:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC20]]
+; CHECK-NEXT:    [[IND_END30:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END27:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[DOTCAST23:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END24:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST23]]
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[TMP2]], 112
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
 ; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[N_VEC19:%.*]] = and i64 [[TMP2]], -16
-; CHECK-NEXT:    [[CAST_VTC:%.*]] = trunc i64 [[N_VEC19]] to i32
-; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]]
-; CHECK-NEXT:    [[IND_END22:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[N_VEC19]]
-; CHECK-NEXT:    [[IND_END25:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[N_VEC19]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT33:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT34:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT33]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[N_VEC21:%.*]] = and i64 [[TMP2]], -16
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC21]] to i32
+; CHECK-NEXT:    [[IND_END22:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST]]
+; CHECK-NEXT:    [[IND_END26:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC21]]
+; CHECK-NEXT:    [[IND_END29:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC21]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT37:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT38:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT37]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[NEXT_GEP30:%.*]] = getelementptr i8, i8* [[PSRC]], i64 [[INDEX29]]
-; CHECK-NEXT:    [[NEXT_GEP31:%.*]] = getelementptr i8, i8* [[PDST]], i64 [[INDEX29]]
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i8* [[NEXT_GEP30]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD32:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 2
-; CHECK-NEXT:    [[TMP23:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[WIDE_LOAD32]], <16 x i8> [[WIDE_LOAD32]], <16 x i8> [[BROADCAST_SPLAT34]])
-; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8* [[NEXT_GEP31]] to <16 x i8>*
-; CHECK-NEXT:    store <16 x i8> [[TMP23]], <16 x i8>* [[TMP24]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT35]] = add nuw i64 [[INDEX29]], 16
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC19]]
-; CHECK-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[INDEX33:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT39:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP34:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX33]]
+; CHECK-NEXT:    [[NEXT_GEP35:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX33]]
+; CHECK-NEXT:    [[WIDE_LOAD36:%.*]] = load <16 x i8>, ptr [[NEXT_GEP34]], align 2
+; CHECK-NEXT:    [[TMP14:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[WIDE_LOAD36]], <16 x i8> [[WIDE_LOAD36]], <16 x i8> [[BROADCAST_SPLAT38]])
+; CHECK-NEXT:    store <16 x i8> [[TMP14]], ptr [[NEXT_GEP35]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT39]] = add nuw i64 [[INDEX33]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC21]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N28:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC19]]
-; CHECK-NEXT:    br i1 [[CMP_N28]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N32:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC21]]
+; CHECK-NEXT:    br i1 [[CMP_N32]], label [[WHILE_END]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END21]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL24:%.*]] = phi i8* [ [[IND_END22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL27:%.*]] = phi i8* [ [[IND_END25]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END26]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL25:%.*]] = phi i32 [ [[IND_END22]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END24]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[BLOCKSIZE]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL28:%.*]] = phi ptr [ [[IND_END26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PSRC]], [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL31:%.*]] = phi ptr [ [[IND_END29]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END30]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PDST]], [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
-; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL24]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi i8* [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL27]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PSRC_ADDR_08]], i64 1
-; CHECK-NEXT:    [[TMP26:%.*]] = load i8, i8* [[PSRC_ADDR_08]], align 2
-; CHECK-NEXT:    [[TMP27:%.*]] = tail call i8 @llvm.fshl.i8(i8 [[TMP26]], i8 [[TMP26]], i8 [[OFFSET]])
-; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PDST_ADDR_07]], i64 1
-; CHECK-NEXT:    store i8 [[TMP27]], i8* [[PDST_ADDR_07]], align 2
+; CHECK-NEXT:    [[BLKCNT_09:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL25]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PSRC_ADDR_08:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL28]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[PDST_ADDR_07:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL31]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRC_ADDR_08]], i64 1
+; CHECK-NEXT:    [[TMP16:%.*]] = load i8, ptr [[PSRC_ADDR_08]], align 2
+; CHECK-NEXT:    [[TMP17:%.*]] = tail call i8 @llvm.fshl.i8(i8 [[TMP16]], i8 [[TMP16]], i8 [[OFFSET]])
+; CHECK-NEXT:    [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PDST_ADDR_07]], i64 1
+; CHECK-NEXT:    store i8 [[TMP17]], ptr [[PDST_ADDR_07]], align 2
 ; CHECK-NEXT:    [[DEC]] = add i32 [[BLKCNT_09]], -1
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -276,13 +264,13 @@ entry:
 
 while.body:                                       ; preds = %entry, %while.body
   %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %blockSize, %entry ]
-  %pSrc.addr.08 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
-  %pDst.addr.07 = phi i8* [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %pSrc.addr.08, i32 1
-  %0 = load i8, i8* %pSrc.addr.08, align 2
+  %pSrc.addr.08 = phi ptr [ %incdec.ptr, %while.body ], [ %pSrc, %entry ]
+  %pDst.addr.07 = phi ptr [ %incdec.ptr3, %while.body ], [ %pDst, %entry ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %pSrc.addr.08, i32 1
+  %0 = load i8, ptr %pSrc.addr.08, align 2
   %1 = tail call i8 @llvm.fshl.i8(i8 %0, i8 %0, i8 %offset)
-  %incdec.ptr3 = getelementptr inbounds i8, i8* %pDst.addr.07, i32 1
-  store i8 %1, i8* %pDst.addr.07, align 2
+  %incdec.ptr3 = getelementptr inbounds i8, ptr %pDst.addr.07, i32 1
+  store i8 %1, ptr %pDst.addr.07, align 2
   %dec = add i32 %blkCnt.09, -1
   %cmp.not = icmp eq i32 %dec, 0
   br i1 %cmp.not, label %while.end, label %while.body
@@ -294,3 +282,5 @@ while.end:                                        ; preds = %while.body, %entry
 declare i16 @llvm.uadd.sat.i16(i16, i16)
 declare i8 @llvm.fshl.i8(i8, i8, i8)
 
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-COST: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
index 70e82f4d467a4..18ac690f5944f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
@@ -3,19 +3,15 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
 
-define void @foo(i64* %ptr, i32* %ptr.2) {
+define void @foo(ptr %ptr, ptr %ptr.2) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR_21:%.*]] = bitcast i32* [[PTR_2:%.*]] to i8*
-; CHECK-NEXT:    [[PTR3:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[PTR_2]], i64 1
-; CHECK-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
-; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i64, i64* [[PTR]], i64 80
-; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast i64* [[SCEVGEP4]] to i8*
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[PTR_21]], [[SCEVGEP45]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[PTR3]], [[SCEVGEP2]]
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR_2:%.*]], i64 4
+; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 640
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[PTR_2]], [[UGLYGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[UGLYGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -30,36 +26,35 @@ define void @foo(i64* %ptr, i32* %ptr.2) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    store i32 [[TMP4]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[VEC_IND]], <4 x i64>* [[TMP8]], align 8, !alias.scope !3
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
+; CHECK-NEXT:    store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope !3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 80, 80
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[CAN_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[CAN_IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = and i64 [[TMP10]], 4294967295
-; CHECK-NEXT:    [[TMP12:%.*]] = trunc i64 [[TMP10]] to i32
-; CHECK-NEXT:    store i32 [[TMP12]], i32* [[PTR_2]], align 4
-; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[CAN_IV]]
-; CHECK-NEXT:    store i64 [[TMP10]], i64* [[GEP_PTR]], align 8
-; CHECK-NEXT:    [[TMP13]] = add nuw nsw i64 [[TMP11]], 1
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], 80
+; CHECK-NEXT:    [[TMP9:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP9]], 4294967295
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
+; CHECK-NEXT:    store i32 [[TMP11]], ptr [[PTR_2]], align 4
+; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[CAN_IV]]
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[GEP_PTR]], align 8
+; CHECK-NEXT:    [[TMP12]] = add nuw nsw i64 [[TMP10]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], 80
 ; CHECK-NEXT:    [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1
-; CHECK-NEXT:    br i1 [[TMP14]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -71,9 +66,9 @@ loop:
   %0 = phi i64 [ 2, %entry ], [ %3, %loop ]
   %1 = and i64 %0, 4294967295
   %2 = trunc i64 %0 to i32
-  store i32 %2, i32* %ptr.2
-  %gep.ptr = getelementptr inbounds i64, i64* %ptr, i64 %can.iv
-  store i64 %0, i64* %gep.ptr
+  store i32 %2, ptr %ptr.2
+  %gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %can.iv
+  store i64 %0, ptr %gep.ptr
   %3 = add nuw nsw i64 %1, 1
   %4 = icmp sgt i32 %2, 80
   %can.iv.next = add nuw nsw i64 %can.iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll b/llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
index 863cd8a4a008d..52ab41a3d6e51 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
@@ -4,18 +4,18 @@
 ; CHECK-LABEL: @no_propagate_range_metadata(
 ; CHECK: load <16 x i8>
 ; CHECK: store <16 x i8>
-define void @no_propagate_range_metadata(i8* readonly %first.coerce, i8* readnone %last.coerce, i8* nocapture %result) {
+define void @no_propagate_range_metadata(ptr readonly %first.coerce, ptr readnone %last.coerce, ptr nocapture %result) {
 for.body.preheader:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
-  %result.addr.05 = phi i8* [ %incdec.ptr, %for.body ], [ %result, %for.body.preheader ]
-  %first.sroa.0.04 = phi i8* [ %incdec.ptr.i.i.i, %for.body ], [ %first.coerce, %for.body.preheader ]
-  %0 = load i8, i8* %first.sroa.0.04, align 1, !range !0
-  store i8 %0, i8* %result.addr.05, align 1
-  %incdec.ptr.i.i.i = getelementptr inbounds i8, i8* %first.sroa.0.04, i64 1
-  %incdec.ptr = getelementptr inbounds i8, i8* %result.addr.05, i64 1
-  %lnot.i = icmp eq i8* %incdec.ptr.i.i.i, %last.coerce
+  %result.addr.05 = phi ptr [ %incdec.ptr, %for.body ], [ %result, %for.body.preheader ]
+  %first.sroa.0.04 = phi ptr [ %incdec.ptr.i.i.i, %for.body ], [ %first.coerce, %for.body.preheader ]
+  %0 = load i8, ptr %first.sroa.0.04, align 1, !range !0
+  store i8 %0, ptr %result.addr.05, align 1
+  %incdec.ptr.i.i.i = getelementptr inbounds i8, ptr %first.sroa.0.04, i64 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %result.addr.05, i64 1
+  %lnot.i = icmp eq ptr %incdec.ptr.i.i.i, %last.coerce
   br i1 %lnot.i, label %for.end.loopexit, label %for.body
 
 for.end.loopexit:                                 ; preds = %for.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll b/llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
index 9aa921fb9cf1a..96cc2be27d27c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
@@ -8,9 +8,9 @@ entry:
   br label %while.body
 
 while.body:
-  %p.05 = phi i8* [ %add.ptr, %while.body ], [ null, %entry ]
+  %p.05 = phi ptr [ %add.ptr, %while.body ], [ null, %entry ]
   %p1.addr.04 = phi i128 [ %sub, %while.body ], [ %p1, %entry ]
-  %add.ptr = getelementptr inbounds i8, i8* %p.05, i32 2
+  %add.ptr = getelementptr inbounds i8, ptr %p.05, i32 2
   %sub = add nsw i128 %p1.addr.04, -2
   %tobool = icmp eq i128 %sub, 0
   br i1 %tobool, label %while.end, label %while.body

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 9b5ea677d277f..631738b0bd67e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -20,28 +20,28 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 ; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
 ;
 entry:
-  %0 = load i32, i32* @c, align 4
+  %0 = load i32, ptr @c, align 4
   %cmp34 = icmp sgt i32 %0, 8
   br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup
 
 for.body.lr.ph:                                   ; preds = %entry
-  %1 = load i32, i32* @a, align 4
+  %1 = load i32, ptr @a, align 4
   %tobool = icmp eq i32 %1, 0
-  %2 = load i64, i64* @b, align 8
+  %2 = load i64, ptr @b, align 8
   %mul = mul i64 %2, 4063299859190
   %tobool6 = icmp eq i64 %mul, 0
   %3 = sext i32 %0 to i64
@@ -57,25 +57,25 @@ for.body.us:                                      ; preds = %for.body.us.prehead
   %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ]
   %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ]
   %4 = sub nsw i64 8, %indvars.iv78
-  %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78
+  %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 %indvars.iv78
   %5 = add nsw i64 %4, %indvars.iv70
-  %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0
+  %arraydecay.us.us.us = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %5, i64 0
   br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader
 
 for.body5.us.us48.preheader:                      ; preds = %for.body.us
-  store i32 8, i32* %arraydecay.us.us.us, align 16
+  store i32 8, ptr %arraydecay.us.us.us, align 16
   %indvars.iv.next66 = or i64 %indvars.iv70, 1
   %6 = add nsw i64 %4, %indvars.iv.next66
-  %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0
-  store i32 8, i32* %arraydecay.us.us55.1, align 8
+  %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %6, i64 0
+  store i32 8, ptr %arraydecay.us.us55.1, align 8
   br label %for.cond.cleanup4.us-lcssa.us.us
 
 for.body5.us.us.us.preheader:                     ; preds = %for.body.us
-  store i32 7, i32* %arraydecay.us.us.us, align 16
+  store i32 7, ptr %arraydecay.us.us.us, align 16
   %indvars.iv.next73 = or i64 %indvars.iv70, 1
   %7 = add nsw i64 %4, %indvars.iv.next73
-  %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0
-  store i32 7, i32* %arraydecay.us.us.us.1, align 8
+  %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %7, i64 0
+  store i32 7, ptr %arraydecay.us.us.us.1, align 8
   br label %for.cond.cleanup4.us-lcssa.us.us
 
 for.cond.cleanup4.us-lcssa.us.us:                 ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader
@@ -97,14 +97,14 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ]
   %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ]
   %8 = sub nsw i64 8, %indvars.iv95
-  %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95
+  %add.ptr = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 %indvars.iv95
   %9 = add nsw i64 %8, %indvars.iv87
-  %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0
-  store i32 8, i32* %arraydecay.us31, align 16
+  %arraydecay.us31 = getelementptr inbounds [10 x i32], ptr %add.ptr, i64 %9, i64 0
+  store i32 8, ptr %arraydecay.us31, align 16
   %indvars.iv.next90 = or i64 %indvars.iv87, 1
   %10 = add nsw i64 %8, %indvars.iv.next90
-  %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0
-  store i32 8, i32* %arraydecay.us31.1, align 8
+  %arraydecay.us31.1 = getelementptr inbounds [10 x i32], ptr %add.ptr, i64 %10, i64 0
+  store i32 8, ptr %arraydecay.us31.1, align 8
   %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2
   %cmp = icmp slt i64 %indvars.iv.next96, %3
   %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2

diff  --git a/llvm/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/llvm/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
index 4519649635b32..b2c0bf2beb8d6 100644
--- a/llvm/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
+++ b/llvm/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -7,14 +7,14 @@ target triple = "xcore"
 ;CHECK: entry:
 ;CHECK-NOT: vector.body
 ;CHECK-NEXT: br label %do.body
-define void @f(i8* nocapture %ptr, i32 %len) {
+define void @f(ptr nocapture %ptr, i32 %len) {
 entry:
   br label %do.body
 do.body:
-  %ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
+  %ptr.addr.0 = phi ptr [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
   %len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.0, i32 1
-  store i8 0, i8* %ptr.addr.0, align 1
+  %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.0, i32 1
+  store i8 0, ptr %ptr.addr.0, align 1
   %dec = add nsw i32 %len.addr.0, -1
   %tobool = icmp eq i32 %len.addr.0, 0
   br i1 %tobool, label %do.end, label %do.body

diff  --git a/llvm/test/Transforms/LoopVectorize/ee-crash.ll b/llvm/test/Transforms/LoopVectorize/ee-crash.ll
index 0c1f2e9120250..54c07ed6ff518 100644
--- a/llvm/test/Transforms/LoopVectorize/ee-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/ee-crash.ll
@@ -7,22 +7,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @_Z4foo1Pii(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
-define i32 @_Z4foo1Pii(i32* %A, i32 %n, <2 x i32> %q) #0 {
+define i32 @_Z4foo1Pii(ptr %A, i32 %n, <2 x i32> %q) #0 {
 entry:
   %idx.ext = sext i32 %n to i64
-  %add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i32, ptr %A, i64 %idx.ext
   %cmp3.i = icmp eq i32 %n, 0
   br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
 
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
-  %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32, i32* %__first.addr.04.i, align 4
+  %__first.addr.04.i = phi ptr [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
+  %0 = load i32, ptr %__first.addr.04.i, align 4
   %q1 = extractelement <2 x i32> %q, i32 %n
   %q2 = add nsw i32 %0, %q1
   %add.i = add nsw i32 %q2, %__init.addr.05.i
-  %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
-  %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
+  %incdec.ptr.i = getelementptr inbounds i32, ptr %__first.addr.04.i, i64 1
+  %cmp.i = icmp eq ptr %incdec.ptr.i, %add.ptr
   br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index d9443fff4ffac..a5207e4f5f27d 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -12,13 +12,12 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ;
 ;
 ;
-define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
+define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) {
 ; UNROLL-NO-IC-LABEL: @recurrence_1(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_PREHEADER:%.*]]
 ; UNROLL-NO-IC:       for.preheader:
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; UNROLL-NO-IC-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
+; UNROLL-NO-IC-NEXT:    [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -36,29 +35,25 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP3]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 1
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP13]]
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP14]]
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP20]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP22]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 4
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP10]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP11]]
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP12]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP17]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP16]], ptr [[TMP18]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3
@@ -69,14 +64,14 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-IC:       scalar.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-IC-NEXT:    [[TMP24]] = load i32, i32* [[ARRAYIDX32]], align 4
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    [[ADD35:%.*]] = add i32 [[TMP24]], [[SCALAR_RECUR]]
-; UNROLL-NO-IC-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-IC-NEXT:    [[TMP20]] = load i32, ptr [[ARRAYIDX32]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    [[ADD35:%.*]] = add i32 [[TMP20]], [[SCALAR_RECUR]]
+; UNROLL-NO-IC-NEXT:    store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
 ; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -87,8 +82,7 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; UNROLL-NO-VF-NEXT:  entry:
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_PREHEADER:%.*]]
 ; UNROLL-NO-VF:       for.preheader:
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; UNROLL-NO-VF-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
+; UNROLL-NO-VF-NEXT:    [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -100,40 +94,40 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, i32* [[TMP6]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add i32 [[TMP7]], [[VECTOR_RECUR]]
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = add i32 [[TMP8]], [[TMP7]]
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP9]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], i32* [[TMP10]], align 4
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP10]] = load i32, ptr [[TMP8]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = add i32 [[TMP9]], [[VECTOR_RECUR]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = add i32 [[TMP10]], [[TMP9]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], ptr [[TMP11]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP14]], ptr [[TMP12]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-VF:       scalar.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-VF-NEXT:    [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]]
-; UNROLL-NO-VF-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-VF-NEXT:    [[TMP16]] = load i32, ptr [[ARRAYIDX32]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]]
+; UNROLL-NO-VF-NEXT:    store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
 ; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -144,8 +138,7 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; SINK-AFTER-NEXT:  entry:
 ; SINK-AFTER-NEXT:    br label [[FOR_PREHEADER:%.*]]
 ; SINK-AFTER:       for.preheader:
-; SINK-AFTER-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
-; SINK-AFTER-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
+; SINK-AFTER-NEXT:    [[PRE_LOAD:%.*]] = load i32, ptr [[A:%.*]], align 4
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -161,19 +154,17 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]]
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP6]], align 4
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]]
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP7]]
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP10]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
@@ -184,14 +175,14 @@ define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
 ; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; SINK-AFTER:       scalar.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; SINK-AFTER-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
-; SINK-AFTER-NEXT:    [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4
-; SINK-AFTER-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]]
-; SINK-AFTER-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; SINK-AFTER-NEXT:    [[TMP12]] = load i32, ptr [[ARRAYIDX32]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP12]], [[SCALAR_RECUR]]
+; SINK-AFTER-NEXT:    store i32 [[ADD35]], ptr [[ARRAYIDX34]], align 4
 ; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
 ; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -202,19 +193,18 @@ entry:
   br label %for.preheader
 
 for.preheader:
-  %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
-  %pre_load = load i32, i32* %arrayidx.phi.trans.insert
+  %pre_load = load i32, ptr %a
   br label %scalar.body
 
 scalar.body:
   %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-  %1 = load i32, i32* %arrayidx32
-  %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %arrayidx32 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  %1 = load i32, ptr %arrayidx32
+  %arrayidx34 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
   %add35 = add i32 %1, %0
-  store i32 %add35, i32* %arrayidx34
+  store i32 %add35, ptr %arrayidx34
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %for.exit, label %scalar.body
@@ -232,14 +222,14 @@ for.exit:
 ;
 ;
 ;
-define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
+define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-IC-LABEL: @recurrence_2(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; UNROLL-NO-IC:       for.preheader:
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
-; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
+; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -253,37 +243,35 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP11]]
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP12]]
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = icmp sgt <4 x i32> [[TMP13]], zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP14]], <4 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP17]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP18]]
-; UNROLL-NO-IC-NEXT:    [[TMP21]] = select <4 x i1> [[TMP19]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP17]]
-; UNROLL-NO-IC-NEXT:    [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]]
+; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP8]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP9]]
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP10]]
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp sgt <4 x i32> [[TMP11]], zeroinitializer
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp sgt <4 x i32> [[TMP12]], zeroinitializer
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> [[TMP11]], <4 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP15]]
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP16]]
+; UNROLL-NO-IC-NEXT:    [[TMP19]] = select <4 x i1> [[TMP17]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP15]]
+; UNROLL-NO-IC-NEXT:    [[TMP20]] = select <4 x i1> [[TMP18]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP16]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
-; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]]
-; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]]
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
+; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP19]], [[TMP20]]
+; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP19]], <4 x i32> [[TMP20]]
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 2
@@ -291,21 +279,21 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-IC:       scalar.ph:
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
-; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-IC:       for.cond.cleanup.loopexit:
-; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; UNROLL-NO-IC:       for.cond.cleanup:
 ; UNROLL-NO-IC-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
 ; UNROLL-NO-IC-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
 ; UNROLL-NO-IC:       scalar.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i32, i32* [[ARRAYIDX]], align 4
-; UNROLL-NO-IC-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP25]], [[SCALAR_RECUR]]
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    [[TMP23]] = load i32, ptr [[ARRAYIDX]], align 4
+; UNROLL-NO-IC-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP23]], [[SCALAR_RECUR]]
 ; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
 ; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
@@ -320,8 +308,8 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-VF-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; UNROLL-NO-VF:       for.preheader:
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
-; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
+; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
 ; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -333,35 +321,35 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION2]]
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP6]] = load i32, i32* [[TMP4]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]]
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]]
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]]
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP12]]
-; UNROLL-NO-VF-NEXT:    [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
-; UNROLL-NO-VF-NEXT:    [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, ptr [[TMP6]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sub nsw i32 [[TMP7]], [[VECTOR_RECUR]]
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sub nsw i32 [[TMP8]], [[TMP7]]
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp sgt i32 [[TMP10]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 0
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = select i1 [[TMP12]], i32 [[TMP10]], i32 0
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP13]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP14]]
+; UNROLL-NO-VF-NEXT:    [[TMP17]] = select i1 [[TMP15]], i32 [[VEC_PHI]], i32 [[TMP13]]
+; UNROLL-NO-VF-NEXT:    [[TMP18]] = select i1 [[TMP16]], i32 [[VEC_PHI1]], i32 [[TMP14]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
-; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
-; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]]
+; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]]
+; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP17]], i32 [[TMP18]]
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
@@ -372,12 +360,12 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; UNROLL-NO-VF-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
 ; UNROLL-NO-VF-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
 ; UNROLL-NO-VF:       scalar.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
-; UNROLL-NO-VF-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    [[TMP20]] = load i32, ptr [[ARRAYIDX]], align 4
+; UNROLL-NO-VF-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP20]], [[SCALAR_RECUR]]
 ; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
 ; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
 ; UNROLL-NO-VF-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
@@ -392,8 +380,8 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; SINK-AFTER-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; SINK-AFTER-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; SINK-AFTER:       for.preheader:
-; SINK-AFTER-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
-; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 -1
+; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -407,23 +395,22 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; SINK-AFTER:       vector.body:
 ; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP10]]
-; SINK-AFTER-NEXT:    [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]]
+; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP5]], align 4
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP6]]
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> zeroinitializer
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP9]]
+; SINK-AFTER-NEXT:    [[TMP11]] = select <4 x i1> [[TMP10]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP9]]
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; SINK-AFTER:       middle.block:
-; SINK-AFTER-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]])
+; SINK-AFTER-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP11]])
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
@@ -431,21 +418,21 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; SINK-AFTER:       scalar.ph:
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
-; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
+; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; SINK-AFTER:       for.cond.cleanup.loopexit:
-; SINK-AFTER-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
+; SINK-AFTER-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; SINK-AFTER:       for.cond.cleanup:
 ; SINK-AFTER-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
 ; SINK-AFTER-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
 ; SINK-AFTER:       scalar.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
-; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    [[TMP15]] = load i32, i32* [[ARRAYIDX]], align 4
-; SINK-AFTER-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP15]], [[SCALAR_RECUR]]
+; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    [[TMP14]] = load i32, ptr [[ARRAYIDX]], align 4
+; SINK-AFTER-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP14]], [[SCALAR_RECUR]]
 ; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
 ; SINK-AFTER-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
 ; SINK-AFTER-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
@@ -460,8 +447,8 @@ entry:
   br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
 
 for.preheader:
-  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
-  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
+  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %a, i64 -1
+  %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4
   br label %scalar.body
 
 for.cond.cleanup.loopexit:
@@ -476,8 +463,8 @@ scalar.body:
   %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %sub3 = sub nsw i32 %1, %0
   %cmp4 = icmp sgt i32 %sub3, 0
   %cond = select i1 %cmp4, i32 %sub3, i32 0
@@ -489,7 +476,7 @@ scalar.body:
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
 }
 
-; void recurrence_3(short *a, double *b, int n, float f, short p) {
+; void recurrence_3(short *a, ptr b, int n, float f, short p) {
 ;   b[0] = (double)a[0] - f * (double)p;
 ;   for (int i = 1; i < n; i++)
 ;     b[i] = (double)a[i] - f * (double)a[i - 1];
@@ -498,16 +485,16 @@ scalar.body:
 ; Check also that the casts were not moved needlessly.
 ;
 ;
-define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n, float %f, i16 %p) {
+define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float %f, i16 %p) {
 ; UNROLL-NO-IC-LABEL: @recurrence_3(
 ; UNROLL-NO-IC-NEXT:  entry:
-; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
 ; UNROLL-NO-IC-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
 ; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
 ; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
 ; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
-; UNROLL-NO-IC-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
+; UNROLL-NO-IC-NEXT:    store double [[SUB]], ptr [[B:%.*]], align 8
 ; UNROLL-NO-IC-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; UNROLL-NO-IC:       for.preheader:
@@ -532,35 +519,31 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP5]]
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP11]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double>
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = sitofp <4 x i16> [[TMP12]] to <4 x double>
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double>
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = fmul fast <4 x double> [[TMP16]], [[BROADCAST_SPLAT]]
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = fmul fast <4 x double> [[TMP17]], [[BROADCAST_SPLAT3]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = fsub fast <4 x double> [[TMP14]], [[TMP18]]
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]]
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP5]]
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>*
-; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP20]], <4 x double>* [[TMP25]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>*
-; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP21]], <4 x double>* [[TMP27]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP5]]
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP9]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double>
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sitofp <4 x i16> [[TMP10]] to <4 x double>
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = fmul fast <4 x double> [[TMP14]], [[BROADCAST_SPLAT]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = fmul fast <4 x double> [[TMP15]], [[BROADCAST_SPLAT3]]
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP16]]
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = fsub fast <4 x double> [[TMP13]], [[TMP17]]
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP5]]
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP18]], ptr [[TMP22]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP19]], ptr [[TMP23]], align 8
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
@@ -571,16 +554,16 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-IC:       scalar.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP29:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
-; UNROLL-NO-IC-NEXT:    [[TMP29]] = load i16, i16* [[ARRAYIDX5]], align 2
-; UNROLL-NO-IC-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP29]] to double
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
+; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i16, ptr [[ARRAYIDX5]], align 2
+; UNROLL-NO-IC-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP25]] to double
 ; UNROLL-NO-IC-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
 ; UNROLL-NO-IC-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
 ; UNROLL-NO-IC-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
-; UNROLL-NO-IC-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
+; UNROLL-NO-IC-NEXT:    store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
 ; UNROLL-NO-IC-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
 ; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -592,13 +575,13 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ;
 ; UNROLL-NO-VF-LABEL: @recurrence_3(
 ; UNROLL-NO-VF-NEXT:  entry:
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
 ; UNROLL-NO-VF-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
 ; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
 ; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
 ; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
-; UNROLL-NO-VF-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
+; UNROLL-NO-VF-NEXT:    store double [[SUB]], ptr [[B:%.*]], align 8
 ; UNROLL-NO-VF-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; UNROLL-NO-VF:       for.preheader:
@@ -614,47 +597,47 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP7]] = load i16, i16* [[TMP5]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sitofp i16 [[TMP6]] to double
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sitofp i16 [[TMP6]] to double
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = fmul fast double [[TMP10]], [[CONV1]]
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]]
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = fsub fast double [[TMP8]], [[TMP12]]
-; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]]
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    store double [[TMP14]], double* [[TMP16]], align 8
-; UNROLL-NO-VF-NEXT:    store double [[TMP15]], double* [[TMP17]], align 8
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]]
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP5]]
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i16, ptr [[TMP6]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP9]] = load i16, ptr [[TMP7]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sitofp i16 [[TMP8]] to double
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = sitofp i16 [[TMP8]] to double
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = fmul fast double [[TMP12]], [[CONV1]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = fmul fast double [[TMP13]], [[CONV1]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = fsub fast double [[TMP10]], [[TMP14]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = fsub fast double [[TMP11]], [[TMP15]]
+; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]]
+; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP5]]
+; UNROLL-NO-VF-NEXT:    store double [[TMP16]], ptr [[TMP18]], align 8
+; UNROLL-NO-VF-NEXT:    store double [[TMP17]], ptr [[TMP19]], align 8
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-VF:       scalar.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
-; UNROLL-NO-VF-NEXT:    [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2
-; UNROLL-NO-VF-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
+; UNROLL-NO-VF-NEXT:    [[TMP21]] = load i16, ptr [[ARRAYIDX5]], align 2
+; UNROLL-NO-VF-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP21]] to double
 ; UNROLL-NO-VF-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
 ; UNROLL-NO-VF-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
 ; UNROLL-NO-VF-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
-; UNROLL-NO-VF-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
+; UNROLL-NO-VF-NEXT:    store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
 ; UNROLL-NO-VF-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
 ; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -666,13 +649,13 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ;
 ; SINK-AFTER-LABEL: @recurrence_3(
 ; SINK-AFTER-NEXT:  entry:
-; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; SINK-AFTER-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
 ; SINK-AFTER-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
 ; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
 ; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
 ; SINK-AFTER-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
-; SINK-AFTER-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
+; SINK-AFTER-NEXT:    store double [[SUB]], ptr [[B:%.*]], align 8
 ; SINK-AFTER-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
 ; SINK-AFTER-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; SINK-AFTER:       for.preheader:
@@ -694,22 +677,20 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
-; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double>
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[TMP10]], [[BROADCAST_SPLAT]]
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP11]]
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]]
-; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, double* [[TMP13]], i32 0
-; SINK-AFTER-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
-; SINK-AFTER-NEXT:    store <4 x double> [[TMP12]], <4 x double>* [[TMP15]], align 8
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]]
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
+; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = sitofp <4 x i16> [[TMP7]] to <4 x double>
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = fmul fast <4 x double> [[TMP9]], [[BROADCAST_SPLAT]]
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = fsub fast <4 x double> [[TMP8]], [[TMP10]]
+; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]]
+; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i32 0
+; SINK-AFTER-NEXT:    store <4 x double> [[TMP11]], ptr [[TMP13]], align 8
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
@@ -720,16 +701,16 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
 ; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; SINK-AFTER:       scalar.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
-; SINK-AFTER-NEXT:    [[TMP17]] = load i16, i16* [[ARRAYIDX5]], align 2
-; SINK-AFTER-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP17]] to double
+; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[ADVARS_IV]]
+; SINK-AFTER-NEXT:    [[TMP15]] = load i16, ptr [[ARRAYIDX5]], align 2
+; SINK-AFTER-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP15]] to double
 ; SINK-AFTER-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
 ; SINK-AFTER-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
 ; SINK-AFTER-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
-; SINK-AFTER-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
-; SINK-AFTER-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
+; SINK-AFTER-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[ADVARS_IV]]
+; SINK-AFTER-NEXT:    store double [[SUB13]], ptr [[ARRAYIDX15]], align 8
 ; SINK-AFTER-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
 ; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -740,13 +721,13 @@ define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n,
 ; SINK-AFTER-NEXT:    ret void
 ;
 entry:
-  %0 = load i16, i16* %a, align 2
+  %0 = load i16, ptr %a, align 2
   %conv = sitofp i16 %0 to double
   %conv1 = fpext float %f to double
   %conv2 = sitofp i16 %p to double
   %mul = fmul fast double %conv2, %conv1
   %sub = fsub fast double %conv, %mul
-  store double %sub, double* %b, align 8
+  store double %sub, ptr %b, align 8
   %cmp25 = icmp sgt i32 %n, 1
   br i1 %cmp25, label %for.preheader, label %for.end
 
@@ -756,14 +737,14 @@ for.preheader:
 scalar.body:
   %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
   %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
-  %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
-  %2 = load i16, i16* %arrayidx5, align 2
+  %arrayidx5 = getelementptr inbounds i16, ptr %a, i64 %advars.iv
+  %2 = load i16, ptr %arrayidx5, align 2
   %conv6 = sitofp i16 %2 to double
   %conv11 = sitofp i16 %1 to double
   %mul12 = fmul fast double %conv11, %conv1
   %sub13 = fsub fast double %conv6, %mul12
-  %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
-  store double %sub13, double* %arrayidx15, align 8
+  %arrayidx15 = getelementptr inbounds double, ptr %b, i64 %advars.iv
+  store double %sub13, ptr %arrayidx15, align 8
   %advars.iv.next = add nuw nsw i64 %advars.iv, 1
   %lftr.wideiv = trunc i64 %advars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -785,20 +766,20 @@ for.end:
 ; }
 ;
 ;
-define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
+define void @PR26734(ptr %a, ptr %b, ptr %c, i32 %d, ptr %e) {
 ; UNROLL-NO-IC-LABEL: @PR26734(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
 ; UNROLL-NO-IC:       entry.for.end_crit_edge:
-; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
+; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_END:%.*]]
 ; UNROLL-NO-IC:       for.body.lr.ph:
-; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
 ; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
-; UNROLL-NO-IC-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
-; UNROLL-NO-IC-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
+; UNROLL-NO-IC-NEXT:    [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
+; UNROLL-NO-IC-NEXT:    [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-IC:       for.body:
 ; UNROLL-NO-IC-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -810,9 +791,9 @@ define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
 ; UNROLL-NO-IC:       for.cond.for.end_crit_edge:
 ; UNROLL-NO-IC-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
-; UNROLL-NO-IC-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
-; UNROLL-NO-IC-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
+; UNROLL-NO-IC-NEXT:    store i32 [[CONV2]], ptr [[C]], align 4
+; UNROLL-NO-IC-NEXT:    store i32 [[AND_LCSSA]], ptr [[B]], align 4
+; UNROLL-NO-IC-NEXT:    store i16 [[SUB]], ptr [[E:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_END]]
 ; UNROLL-NO-IC:       for.end:
 ; UNROLL-NO-IC-NEXT:    ret void
@@ -822,14 +803,14 @@ define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
 ; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
 ; UNROLL-NO-VF:       entry.for.end_crit_edge:
-; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
+; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_END:%.*]]
 ; UNROLL-NO-VF:       for.body.lr.ph:
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
 ; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
-; UNROLL-NO-VF-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
-; UNROLL-NO-VF-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
+; UNROLL-NO-VF-NEXT:    [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
+; UNROLL-NO-VF-NEXT:    [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-VF:       for.body:
 ; UNROLL-NO-VF-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -841,9 +822,9 @@ define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
 ; UNROLL-NO-VF:       for.cond.for.end_crit_edge:
 ; UNROLL-NO-VF-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
-; UNROLL-NO-VF-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
+; UNROLL-NO-VF-NEXT:    store i32 [[CONV2]], ptr [[C]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[AND_LCSSA]], ptr [[B]], align 4
+; UNROLL-NO-VF-NEXT:    store i16 [[SUB]], ptr [[E:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_END]]
 ; UNROLL-NO-VF:       for.end:
 ; UNROLL-NO-VF-NEXT:    ret void
@@ -853,14 +834,14 @@ define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
 ; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
 ; SINK-AFTER-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
 ; SINK-AFTER:       entry.for.end_crit_edge:
-; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
+; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[B:%.*]], align 4
 ; SINK-AFTER-NEXT:    br label [[FOR_END:%.*]]
 ; SINK-AFTER:       for.body.lr.ph:
-; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
+; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; SINK-AFTER-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
 ; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
-; SINK-AFTER-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
-; SINK-AFTER-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
+; SINK-AFTER-NEXT:    [[C_PROMOTED:%.*]] = load i32, ptr [[C:%.*]], align 4
+; SINK-AFTER-NEXT:    [[B_PROMOTED:%.*]] = load i32, ptr [[B]], align 4
 ; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
 ; SINK-AFTER:       for.body:
 ; SINK-AFTER-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -872,9 +853,9 @@ define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
 ; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
 ; SINK-AFTER:       for.cond.for.end_crit_edge:
 ; SINK-AFTER-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
-; SINK-AFTER-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
-; SINK-AFTER-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
-; SINK-AFTER-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
+; SINK-AFTER-NEXT:    store i32 [[CONV2]], ptr [[C]], align 4
+; SINK-AFTER-NEXT:    store i32 [[AND_LCSSA]], ptr [[B]], align 4
+; SINK-AFTER-NEXT:    store i16 [[SUB]], ptr [[E:%.*]], align 2
 ; SINK-AFTER-NEXT:    br label [[FOR_END]]
 ; SINK-AFTER:       for.end:
 ; SINK-AFTER-NEXT:    ret void
@@ -884,15 +865,15 @@ entry:
   br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
 
 entry.for.end_crit_edge:
-  %.pre = load i32, i32* %b, align 4
+  %.pre = load i32, ptr %b, align 4
   br label %for.end
 
 for.body.lr.ph:
-  %0 = load i16, i16* %a, align 2
+  %0 = load i16, ptr %a, align 2
   %sub = add i16 %0, -6
   %conv2 = sext i16 %sub to i32
-  %c.promoted = load i32, i32* %c, align 4
-  %b.promoted = load i32, i32* %b, align 4
+  %c.promoted = load i32, ptr %c, align 4
+  %b.promoted = load i32, ptr %b, align 4
   br label %for.body
 
 for.body:
@@ -906,9 +887,9 @@ for.body:
 
 for.cond.for.end_crit_edge:
   %and.lcssa = phi i32 [ %and, %for.body ]
-  store i32 %conv2, i32* %c, align 4
-  store i32 %and.lcssa, i32* %b, align 4
-  store i16 %sub, i16* %e, align 2
+  store i32 %conv2, ptr %c, align 4
+  store i32 %and.lcssa, ptr %b, align 4
+  store i16 %sub, ptr %e, align 2
   br label %for.end
 
 for.end:
@@ -994,18 +975,18 @@ define i32 @PR27246() {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[INDUCTION1:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1]] = add i32 [[OFFSET_IDX]], -1
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1]] = add i32 [[OFFSET_IDX]], -1
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[INDUCTION1]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_COND1:%.*]]
 ; UNROLL-NO-VF:       for.cond.cleanup:
@@ -1018,7 +999,7 @@ define i32 @PR27246() {
 ; UNROLL-NO-VF-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP8:![0-9]+]]
 ; UNROLL-NO-VF:       for.cond.cleanup3:
-; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[INDUCTION]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
@@ -1100,7 +1081,7 @@ for.cond.cleanup3:
 }
 
 ;
-define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
+define void @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
 ; UNROLL-NO-IC-LABEL: @PR30183(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
@@ -1134,26 +1115,26 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add nuw nsw i64 [[TMP8]], 2
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add nuw nsw i64 [[TMP9]], 2
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = add nuw nsw i64 [[TMP10]], 2
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP12]]
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
-; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
-; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
-; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP19]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP20]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP21]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP22]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
+; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
+; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
+; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]]
+; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP19]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP20]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP21]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP22]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 1
 ; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 2
 ; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3
-; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load i32, i32* [[TMP23]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP24]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP25]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load i32, ptr [[TMP23]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP24]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = load i32, ptr [[TMP25]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = load i32, ptr [[TMP26]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP35]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1
 ; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 2
@@ -1176,8 +1157,8 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
-; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
-; UNROLL-NO-IC-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
+; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
+; UNROLL-NO-IC-NEXT:    [[VAR2]] = load i32, ptr [[VAR1]], align 4
 ; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; UNROLL-NO-IC:       for.end:
@@ -1197,32 +1178,32 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, i32* [[TMP6]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP3]], 2
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP5]]
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP10]] = load i32, ptr [[TMP8]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
 ; UNROLL-NO-VF:       scalar.body:
 ; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
-; UNROLL-NO-VF-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
-; UNROLL-NO-VF-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
+; UNROLL-NO-VF-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
+; UNROLL-NO-VF-NEXT:    [[VAR2]] = load i32, ptr [[VAR1]], align 4
 ; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
@@ -1253,14 +1234,14 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
 ; SINK-AFTER-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
 ; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
 ; SINK-AFTER-NEXT:    [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP7]]
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
-; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
-; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4
-; SINK-AFTER-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
-; SINK-AFTER-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP13]], align 4
-; SINK-AFTER-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP14]], align 4
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]]
+; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
+; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]]
+; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
+; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4
+; SINK-AFTER-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4
+; SINK-AFTER-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4
+; SINK-AFTER-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4
 ; SINK-AFTER-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
 ; SINK-AFTER-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
 ; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
@@ -1282,8 +1263,8 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
 ; SINK-AFTER-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
-; SINK-AFTER-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
-; SINK-AFTER-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
+; SINK-AFTER-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_NEXT]]
+; SINK-AFTER-NEXT:    [[VAR2]] = load i32, ptr [[VAR1]], align 4
 ; SINK-AFTER-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
 ; SINK-AFTER-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; SINK-AFTER:       for.end:
@@ -1296,8 +1277,8 @@ scalar.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
   %var0 = phi i32 [ %pre_load, %entry ], [ %var2, %scalar.body ]
   %i.next = add nuw nsw i64 %i, 2
-  %var1 = getelementptr inbounds i32, i32* %a, i64 %i.next
-  %var2 = load i32, i32* %var1
+  %var1 = getelementptr inbounds i32, ptr %a, i64 %i.next
+  %var2 = load i32, ptr %var1
   %cond = icmp eq i64 %i.next,%n
   br i1 %cond, label %for.end, label %scalar.body
 
@@ -1416,7 +1397,7 @@ for.end:
 ; the first order recurrence phi is used outside the loop, so we require the phi
 ; itself and not its update (addx).
 ; Check the case when unrolled but not vectorized.
-define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
+define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; UNROLL-NO-IC-LABEL: @extract_second_last_iteration(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1467,19 +1448,19 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[INDUCTION]], [[X:%.*]]
-; UNROLL-NO-VF-NEXT:    [[TMP1]] = add i32 [[INDUCTION1]], [[X]]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], [[X:%.*]]
+; UNROLL-NO-VF-NEXT:    [[TMP3]] = add i32 [[TMP1]], [[X]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-VF:       for.body:
@@ -1491,7 +1472,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
 ; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
-; UNROLL-NO-VF-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
 ;
 ; SINK-AFTER-LABEL: @extract_second_last_iteration(
@@ -1554,7 +1535,7 @@ for.end:
 ; insertelement of the last part UF - 1, assuming the latter appears after the
 ; insertelements of all other parts.
 ;
-; int PR33613(double *b, double j, int d) {
+; int PR33613(ptr b, double j, int d) {
 ;   int a = 0;
 ;   for(int i = 0; i < 10240; i++, b+=25) {
 ;     double f = b[d]; // Scalarize to form insertelements
@@ -1566,13 +1547,13 @@ for.end:
 ; }
 ;
 ;
-define i32 @PR33613(double* %b, double %j, i32 %d) {
+define i32 @PR33613(ptr %b, double %j, i32 %d) {
 ; UNROLL-NO-IC-LABEL: @PR33613(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
-; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
+; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
@@ -1581,49 +1562,49 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
+; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
-; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
+; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
 ; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[TMP9]]
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 5
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[TMP11]]
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 6
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[TMP13]]
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
 ; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 7
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 25
-; UNROLL-NO-IC-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]]
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load double, double* [[TMP16]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load double, double* [[TMP17]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load double, double* [[TMP19]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 200
+; UNROLL-NO-IC-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP15]]
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP5]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP6]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP7]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP8]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load double, ptr [[TMP16]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load double, ptr [[TMP17]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = load double, ptr [[TMP18]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load double, ptr [[TMP19]], align 8
 ; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <4 x double> poison, double [[TMP24]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i32 1
 ; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i32 2
 ; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3
-; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load double, double* [[TMP20]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load double, double* [[TMP21]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
-; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load double, double* [[TMP23]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load double, ptr [[TMP20]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load double, ptr [[TMP21]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = load double, ptr [[TMP22]], align 8
+; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load double, ptr [[TMP23]], align 8
 ; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = insertelement <4 x double> poison, double [[TMP32]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i32 1
 ; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i32 2
@@ -1650,7 +1631,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-IC:       scalar.ph:
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
-; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
+; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
@@ -1658,18 +1639,18 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-IC-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    ret i32 [[A_1_LCSSA]]
 ; UNROLL-NO-IC:       for.body:
-; UNROLL-NO-IC-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
-; UNROLL-NO-IC-NEXT:    [[TMP52]] = load double, double* [[ARRAYIDX]], align 8
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
+; UNROLL-NO-IC-NEXT:    [[TMP52]] = load double, ptr [[ARRAYIDX]], align 8
 ; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP52]]
 ; UNROLL-NO-IC-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
 ; UNROLL-NO-IC-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
 ; UNROLL-NO-IC-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
 ; UNROLL-NO-IC-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
-; UNROLL-NO-IC-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
+; UNROLL-NO-IC-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
 ; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ;
@@ -1678,7 +1659,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-VF-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
 ; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-VF:       vector.ph:
-; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
+; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -1686,15 +1667,15 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-VF-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
-; UNROLL-NO-VF-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 200
+; UNROLL-NO-VF-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
-; UNROLL-NO-VF-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
-; UNROLL-NO-VF-NEXT:    [[TMP7]] = load double, double* [[TMP5]], align 8
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 200
+; UNROLL-NO-VF-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP4]], align 8
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = load double, ptr [[TMP5]], align 8
 ; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]]
 ; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]]
 ; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00
@@ -1712,7 +1693,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
 ; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
-; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
+; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
@@ -1720,18 +1701,18 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; UNROLL-NO-VF-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    ret i32 [[A_1_LCSSA]]
 ; UNROLL-NO-VF:       for.body:
-; UNROLL-NO-VF-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
-; UNROLL-NO-VF-NEXT:    [[TMP17]] = load double, double* [[ARRAYIDX]], align 8
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
+; UNROLL-NO-VF-NEXT:    [[TMP17]] = load double, ptr [[ARRAYIDX]], align 8
 ; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]]
 ; UNROLL-NO-VF-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
 ; UNROLL-NO-VF-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
 ; UNROLL-NO-VF-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
 ; UNROLL-NO-VF-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
-; UNROLL-NO-VF-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
+; UNROLL-NO-VF-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ;
@@ -1740,7 +1721,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; SINK-AFTER-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
 ; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-AFTER:       vector.ph:
-; SINK-AFTER-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
+; SINK-AFTER-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2048000
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
@@ -1748,25 +1729,25 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; SINK-AFTER-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
-; SINK-AFTER-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
+; SINK-AFTER-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 200
+; SINK-AFTER-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; SINK-AFTER-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
-; SINK-AFTER-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
+; SINK-AFTER-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 200
+; SINK-AFTER-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
-; SINK-AFTER-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 200
+; SINK-AFTER-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
 ; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
-; SINK-AFTER-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP8]], align 8
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
-; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
-; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load double, double* [[TMP11]], align 8
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 200
+; SINK-AFTER-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP]], i64 [[IDXPROM]]
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP2]], i64 [[IDXPROM]]
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP3]], i64 [[IDXPROM]]
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[NEXT_GEP4]], i64 [[IDXPROM]]
+; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load double, ptr [[TMP8]], align 8
+; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load double, ptr [[TMP9]], align 8
+; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load double, ptr [[TMP10]], align 8
+; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load double, ptr [[TMP11]], align 8
 ; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x double> poison, double [[TMP12]], i32 0
 ; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i32 1
 ; SINK-AFTER-NEXT:    [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 2
@@ -1787,7 +1768,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; SINK-AFTER:       scalar.ph:
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
-; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
+; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
@@ -1795,18 +1776,18 @@ define i32 @PR33613(double* %b, double %j, i32 %d) {
 ; SINK-AFTER-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    ret i32 [[A_1_LCSSA]]
 ; SINK-AFTER:       for.body:
-; SINK-AFTER-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[B_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
-; SINK-AFTER-NEXT:    [[TMP27]] = load double, double* [[ARRAYIDX]], align 8
+; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 [[IDXPROM]]
+; SINK-AFTER-NEXT:    [[TMP27]] = load double, ptr [[ARRAYIDX]], align 8
 ; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP27]]
 ; SINK-AFTER-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
 ; SINK-AFTER-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
 ; SINK-AFTER-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
 ; SINK-AFTER-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
-; SINK-AFTER-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
+; SINK-AFTER-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, ptr [[B_ADDR_012]], i64 25
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
 ; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ;
@@ -1819,33 +1800,33 @@ for.cond.cleanup:
   ret i32 %a.1.lcssa
 
 for.body:
-  %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
+  %b.addr.012 = phi ptr [ %b, %entry ], [ %add.ptr, %for.body ]
   %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
   %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
   %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b.addr.012, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
   %mul = fmul double %j.addr.09, %0
   %tobool = fcmp une double %mul, 0.000000e+00
   %inc = zext i1 %tobool to i32
   %a.1 = add nsw i32 %a.010, %inc
   %inc1 = add nuw nsw i32 %i.011, 1
-  %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
+  %add.ptr = getelementptr inbounds double, ptr %b.addr.012, i64 25
   %exitcond = icmp eq i32 %inc1, 10240
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
 ; void sink_after(short *a, int n, int *b) {
 ;   for(int i = 0; i < n; i++)
-;     b[i] = (a[i] * a[i + 1]);
+;     b[i] = (aptr a[i + 1]);
 ; }
 ;
 ; Check that the sext sank after the load in the vector loop.
 ;
-define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
+define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) {
 ; UNROLL-NO-IC-LABEL: @sink_after(
 ; UNROLL-NO-IC-NEXT:  entry:
-; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
 ; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
@@ -1860,33 +1841,29 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
-; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP12]]
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]]
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP21]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP23]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
+; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP7]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP10]]
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]]
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP18]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP19]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
@@ -1897,16 +1874,16 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-IC:       for.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i16, i16* [[ARRAYIDX2]], align 2
-; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP25]] to i32
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-IC-NEXT:    [[TMP21]] = load i16, ptr [[ARRAYIDX2]], align 2
+; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP21]] to i32
 ; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; UNROLL-NO-IC:       for.end:
@@ -1914,7 +1891,7 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ;
 ; UNROLL-NO-VF-LABEL: @sink_after(
 ; UNROLL-NO-VF-NEXT:  entry:
-; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
 ; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-VF:       vector.ph:
@@ -1923,46 +1900,46 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = load i16, ptr [[TMP5]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP7]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], ptr [[TMP14]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], ptr [[TMP15]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-VF:       for.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-VF-NEXT:    [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2
-; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-VF-NEXT:    [[TMP17]] = load i16, ptr [[ARRAYIDX2]], align 2
+; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP17]] to i32
 ; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
@@ -1970,7 +1947,7 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ;
 ; SINK-AFTER-LABEL: @sink_after(
 ; SINK-AFTER-NEXT:  entry:
-; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
 ; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-AFTER:       vector.ph:
@@ -1983,21 +1960,19 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
-; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0
-; SINK-AFTER-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
-; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP6]]
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
+; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
+; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
+; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2
+; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP5]]
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
@@ -2008,23 +1983,23 @@ define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
 ; SINK-AFTER:       for.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; SINK-AFTER-NEXT:    [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2
-; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
+; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; SINK-AFTER-NEXT:    [[TMP11]] = load i16, ptr [[ARRAYIDX2]], align 2
+; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP11]] to i32
 ; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; SINK-AFTER:       for.end:
 ; SINK-AFTER-NEXT:    ret void
 ;
 entry:
-  %.pre = load i16, i16* %a
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
@@ -2032,12 +2007,12 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %conv = sext i16 %0 to i32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
-  %1 = load i16, i16* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
+  %1 = load i16, ptr %arrayidx2
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv3, %conv
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
 
@@ -2061,11 +2036,10 @@ for.end:
 ;
 ; Check that the sext sank after the load in the vector loop.
 ;
-define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i64 %n) {
+define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
 ; UNROLL-NO-IC-LABEL: @PR34711(
 ; UNROLL-NO-IC-NEXT:  entry:
-; UNROLL-NO-IC-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
-; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
+; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
 ; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
@@ -2075,7 +2049,7 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
@@ -2084,78 +2058,74 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP19]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP21]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1
-; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2
-; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3
-; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1
-; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2
-; UNROLL-NO-IC-NEXT:    [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3
-; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]]
-; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
-; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP4]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP5]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP18]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP19]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = load i16, ptr [[TMP12]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i16, ptr [[TMP13]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = insertelement <4 x i16> poison, i16 [[TMP20]], i32 0
+; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP21]], i32 1
+; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> [[TMP25]], i16 [[TMP22]], i32 2
+; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 3
+; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load i16, ptr [[TMP14]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load i16, ptr [[TMP15]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i16, ptr [[TMP16]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = load i16, ptr [[TMP17]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i32 0
+; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i32 1
+; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP30]], i32 2
+; UNROLL-NO-IC-NEXT:    [[TMP35]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 3
+; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP27]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = shufflevector <4 x i16> [[TMP27]], <4 x i16> [[TMP35]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = sext <4 x i16> [[TMP36]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = sext <4 x i16> [[TMP27]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = sext <4 x i16> [[TMP35]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]]
+; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = mul nsw <4 x i32> [[TMP41]], [[TMP39]]
+; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP46]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP47]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3
-; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2
+; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP35]], i32 3
+; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP35]], i32 2
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-IC:       scalar.ph:
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-IC:       for.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP49:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
-; UNROLL-NO-IC-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
+; UNROLL-NO-IC-NEXT:    store i32 7, ptr [[ARRAYCIDX]], align 4
 ; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
-; UNROLL-NO-IC-NEXT:    [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2
-; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP53]] to i32
+; UNROLL-NO-IC-NEXT:    [[TMP49]] = load i16, ptr [[CUR_INDEX]], align 2
+; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP49]] to i32
 ; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
@@ -2164,8 +2134,7 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ;
 ; UNROLL-NO-VF-LABEL: @PR34711(
 ; UNROLL-NO-VF-NEXT:  entry:
-; UNROLL-NO-VF-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
-; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
+; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
 ; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-VF:       vector.ph:
@@ -2174,49 +2143,49 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION1]], i64 1
-; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP0]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP1]], align 4
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
+; UNROLL-NO-VF-NEXT:    store i32 7, ptr [[TMP2]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 7, ptr [[TMP3]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = load i16, ptr [[TMP5]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP7]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], ptr [[TMP14]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], ptr [[TMP15]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-VF:       for.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
-; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
+; UNROLL-NO-VF-NEXT:    store i32 7, ptr [[ARRAYCIDX]], align 4
 ; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2
-; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP17]] = load i16, ptr [[CUR_INDEX]], align 2
+; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP17]] to i32
 ; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -2225,8 +2194,7 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ;
 ; SINK-AFTER-LABEL: @PR34711(
 ; SINK-AFTER-NEXT:  entry:
-; SINK-AFTER-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
-; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
+; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
 ; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-AFTER:       vector.ph:
@@ -2236,59 +2204,57 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
 ; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP10]], align 4
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2
-; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2
-; SINK-AFTER-NEXT:    [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0
-; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1
-; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2
-; SINK-AFTER-NEXT:    [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3
-; SINK-AFTER-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]]
-; SINK-AFTER-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; SINK-AFTER-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
-; SINK-AFTER-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4
+; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP9]], align 4
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2
+; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2
+; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load i16, ptr [[TMP8]], align 2
+; SINK-AFTER-NEXT:    [[TMP14:%.*]] = insertelement <4 x i16> poison, i16 [[TMP10]], i32 0
+; SINK-AFTER-NEXT:    [[TMP15:%.*]] = insertelement <4 x i16> [[TMP14]], i16 [[TMP11]], i32 1
+; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 2
+; SINK-AFTER-NEXT:    [[TMP17]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 3
+; SINK-AFTER-NEXT:    [[TMP18:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP17]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]]
+; SINK-AFTER-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SINK-AFTER-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> [[TMP21]], ptr [[TMP23]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3
-; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2
+; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP17]], i32 3
+; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP17]], i32 2
 ; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; SINK-AFTER:       scalar.ph:
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
 ; SINK-AFTER:       for.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
-; SINK-AFTER-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDVARS_IV]], i64 1
+; SINK-AFTER-NEXT:    store i32 7, ptr [[ARRAYCIDX]], align 4
 ; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
-; SINK-AFTER-NEXT:    [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2
-; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
+; SINK-AFTER-NEXT:    [[TMP25]] = load i16, ptr [[CUR_INDEX]], align 2
+; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP25]] to i32
 ; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
-; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
@@ -2297,22 +2263,21 @@ define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i6
 ;
 
 entry:
-  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
-  %.pre = load i16, i16* %pre.index
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
   %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
-  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
+  %arraycidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  %cur.index = getelementptr inbounds [2 x i16], ptr %a, i64 %indvars.iv, i64 1
+  store i32 7, ptr %arraycidx   ; 1st instruction, to be widened.
   %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
-  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
+  %1 = load i16, ptr %cur.index ; 3rd, first-order-recurring load not widened.
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv3, %conv
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -2328,10 +2293,10 @@ for.end:
 ;
 
 ;
-define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i64 %n) {
+define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 %n) {
 ; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users(
 ; UNROLL-NO-IC-NEXT:  entry:
-; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
 ; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
@@ -2346,35 +2311,31 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
-; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
-; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
-; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP12]], <i32 2, i32 2, i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 2, i32 2, i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP16]]
-; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP17]]
-; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP23]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* [[TMP25]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
+; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4
+; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP7]], align 2
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[TMP10]], <i32 2, i32 2, i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP11]], <i32 2, i32 2, i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
+; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP14]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP15]]
+; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP16]], ptr [[TMP20]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], ptr [[TMP21]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
@@ -2385,17 +2346,17 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-IC:       for.body:
-; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; UNROLL-NO-IC-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-IC-NEXT:    [[TMP27]] = load i16, i16* [[ARRAYIDX2]], align 2
-; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-IC-NEXT:    [[TMP23]] = load i16, ptr [[ARRAYIDX2]], align 2
+; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP23]] to i32
 ; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; UNROLL-NO-IC:       for.end:
@@ -2403,7 +2364,7 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ;
 ; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users(
 ; UNROLL-NO-VF-NEXT:  entry:
-; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
 ; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-VF:       vector.ph:
@@ -2412,49 +2373,49 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP6]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP4]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP5]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP8]], [[TMP10]]
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = mul nsw i32 [[TMP9]], [[TMP11]]
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], i32* [[TMP14]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], i32* [[TMP15]], align 4
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP2]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = load i16, ptr [[TMP5]], align 2
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP9]], 2
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = sext i16 [[TMP6]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = sext i16 [[TMP7]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = mul nsw i32 [[TMP11]], [[TMP13]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP14]], ptr [[TMP16]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP15]], ptr [[TMP17]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL-NO-VF:       for.body:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; UNROLL-NO-VF-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; UNROLL-NO-VF-NEXT:    [[TMP17]] = load i16, i16* [[ARRAYIDX2]], align 2
-; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP17]] to i32
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; UNROLL-NO-VF-NEXT:    [[TMP19]] = load i16, ptr [[ARRAYIDX2]], align 2
+; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP19]] to i32
 ; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
@@ -2462,7 +2423,7 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ;
 ; SINK-AFTER-LABEL: @sink_after_with_multiple_users(
 ; SINK-AFTER-NEXT:  entry:
-; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
+; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2
 ; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
 ; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; SINK-AFTER:       vector.ph:
@@ -2475,22 +2436,20 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
-; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0
-; SINK-AFTER-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
-; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2>
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP8]]
-; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
-; SINK-AFTER-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4
+; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]]
+; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
+; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2
+; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 2, i32 2, i32 2, i32 2>
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; SINK-AFTER-NEXT:    [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP7]]
+; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
@@ -2501,24 +2460,24 @@ define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i6
 ; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
 ; SINK-AFTER:       for.body:
-; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
 ; SINK-AFTER-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
 ; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
-; SINK-AFTER-NEXT:    [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
-; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
+; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; SINK-AFTER-NEXT:    [[TMP12]] = load i16, ptr [[ARRAYIDX2]], align 2
+; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP12]] to i32
 ; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
-; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX5]], align 4
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; SINK-AFTER:       for.end:
 ; SINK-AFTER-NEXT:    ret void
 ;
 entry:
-  %.pre = load i16, i16* %a
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
@@ -2527,12 +2486,12 @@ for.body:
   %conv = sext i16 %0 to i32
   %add = add nsw i32 %conv, 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
-  %1 = load i16, i16* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
+  %1 = load i16, ptr %arrayidx2
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %add, %conv3
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
 
@@ -2544,7 +2503,7 @@ for.end:
 ; sinking, instructions with side effects (e.g. loads) conditioned by those
 ; branches will become users of the condition bit after vectorization and would
 ; need to be sunk if the loop is vectorized.
-define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
+define void @do_not_sink_branch(i32 %x, ptr %in, ptr %out, i32 %tc) local_unnamed_addr #0 {
 ; UNROLL-NO-IC-LABEL: @do_not_sink_branch(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
@@ -2554,13 +2513,13 @@ define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unna
 ; UNROLL-NO-IC-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
 ; UNROLL-NO-IC-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
 ; UNROLL-NO-IC:       cond.true:
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[COND_END]]
 ; UNROLL-NO-IC:       cond.end:
 ; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
-; UNROLL-NO-IC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
-; UNROLL-NO-IC-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
+; UNROLL-NO-IC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
+; UNROLL-NO-IC-NEXT:    store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
 ; UNROLL-NO-IC-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
 ; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
@@ -2577,13 +2536,13 @@ define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unna
 ; UNROLL-NO-VF-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
 ; UNROLL-NO-VF:       cond.true:
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; UNROLL-NO-VF-NEXT:    br label [[COND_END]]
 ; UNROLL-NO-VF:       cond.end:
 ; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
-; UNROLL-NO-VF-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
-; UNROLL-NO-VF-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
+; UNROLL-NO-VF-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
+; UNROLL-NO-VF-NEXT:    store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
 ; UNROLL-NO-VF-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
 ; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
@@ -2600,13 +2559,13 @@ define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unna
 ; SINK-AFTER-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
 ; SINK-AFTER-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
 ; SINK-AFTER:       cond.true:
-; SINK-AFTER-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i32 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; SINK-AFTER-NEXT:    br label [[COND_END]]
 ; SINK-AFTER:       cond.end:
 ; SINK-AFTER-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
-; SINK-AFTER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
-; SINK-AFTER-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
+; SINK-AFTER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 [[INDVARS_IV]]
+; SINK-AFTER-NEXT:    store i32 [[COND]], ptr [[ARRAYIDX8]], align 4
 ; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
 ; SINK-AFTER-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
 ; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
@@ -2624,14 +2583,14 @@ for.body4:                                        ; preds = %cond.end, %entry
   br i1 %cmp534, label %cond.true, label %cond.end
 
 cond.true:                                        ; preds = %for.body4
-  %arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
-  %in.val = load i32, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %in, i32 %indvars.iv
+  %in.val = load i32, ptr %arrayidx7, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %for.body4, %cond.true
   %cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
-  %arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
-  store i32 %cond, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %out, i32 %indvars.iv
+  store i32 %cond, ptr %arrayidx8, align 4
   %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
   %cmp5 = icmp slt i32 %indvars.iv.next, %tc
   %exitcond = icmp eq i32 %indvars.iv.next, %x
@@ -2701,35 +2660,35 @@ define void @sink_dead_inst() {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR1:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
-; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[TMP0]]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i16 [[OFFSET_IDX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i16 [[INDUCTION2]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP4]] = zext i16 [[TMP2]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add i16 [[TMP1]], 5
-; UNROLL-NO-VF-NEXT:    [[TMP6]] = add i16 [[TMP2]], 5
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR1:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i16 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP5]] = zext i16 [[TMP3]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = add i16 [[TMP2]], 5
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = add i16 [[TMP3]], 5
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 42
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT2:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[FOR_COND:%.*]]
 ; UNROLL-NO-VF:       for.cond:
 ; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
 ; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR3:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT2]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
 ; UNROLL-NO-VF-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
-; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR4]], 15
+; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR3]], 15
 ; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
 ; UNROLL-NO-VF-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
 ; UNROLL-NO-VF-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
@@ -2946,41 +2905,41 @@ define i32 @sink_into_replication_region(i32 %y) {
 ; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
-; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE5:%.*]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_UDIV_CONTINUE5]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_UDIV_CONTINUE5]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE5]] ]
+; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE4]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_UDIV_CONTINUE4]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_UDIV_CONTINUE4]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
 ; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[VEC_IV2:%.*]] = add i32 [[INDEX]], 1
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]]
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV2]], [[TRIP_COUNT_MINUS_1]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
 ; UNROLL-NO-VF:       pred.udiv.if:
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
 ; UNROLL-NO-VF:       pred.udiv.continue:
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]]
 ; UNROLL-NO-VF:       pred.udiv.if3:
-; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]]
-; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -1
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = udiv i32 219220132, [[TMP7]]
+; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
 ; UNROLL-NO-VF:       pred.udiv.continue4:
-; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF4]] ]
-; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
-; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI1]], [[TMP5]]
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI1]]
+; UNROLL-NO-VF-NEXT:    [[TMP9]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP8]], [[PRED_UDIV_IF3]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP10]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
+; UNROLL-NO-VF-NEXT:    [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP6]]
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP10]], i32 [[VEC_PHI]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP11]], i32 [[VEC_PHI1]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
-; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]]
+; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
 ; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[BB2:%.*]]
@@ -3104,7 +3063,7 @@ bb:
   br i1 %var9, label %bb1, label %bb2, !prof !2
 }
 
-define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
+define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ;
 ; CHECK-LABEL: @sink_into_replication_region_multiple(
 ; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple(
@@ -3212,64 +3171,64 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; UNROLL-NO-IC:       pred.store.if:
 ; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP49]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP2]], i32* [[TMP50]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP2]], ptr [[TMP50]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; UNROLL-NO-IC:       pred.store.continue:
 ; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP51]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
 ; UNROLL-NO-IC:       pred.store.if18:
 ; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = add i32 [[INDEX]], 1
-; UNROLL-NO-IC-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP52]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP3]], i32* [[TMP53]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP52]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP3]], ptr [[TMP53]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE19]]
 ; UNROLL-NO-IC:       pred.store.continue19:
 ; UNROLL-NO-IC-NEXT:    [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP54]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
 ; UNROLL-NO-IC:       pred.store.if20:
 ; UNROLL-NO-IC-NEXT:    [[TMP55:%.*]] = add i32 [[INDEX]], 2
-; UNROLL-NO-IC-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP55]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP4]], i32* [[TMP56]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP55]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP4]], ptr [[TMP56]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE21]]
 ; UNROLL-NO-IC:       pred.store.continue21:
 ; UNROLL-NO-IC-NEXT:    [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP57]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
 ; UNROLL-NO-IC:       pred.store.if22:
 ; UNROLL-NO-IC-NEXT:    [[TMP58:%.*]] = add i32 [[INDEX]], 3
-; UNROLL-NO-IC-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP58]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP5]], i32* [[TMP59]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP58]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP5]], ptr [[TMP59]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE23]]
 ; UNROLL-NO-IC:       pred.store.continue23:
 ; UNROLL-NO-IC-NEXT:    [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP60]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
 ; UNROLL-NO-IC:       pred.store.if24:
 ; UNROLL-NO-IC-NEXT:    [[TMP61:%.*]] = add i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP61]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP6]], i32* [[TMP62]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP61]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP6]], ptr [[TMP62]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE25]]
 ; UNROLL-NO-IC:       pred.store.continue25:
 ; UNROLL-NO-IC-NEXT:    [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP63]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
 ; UNROLL-NO-IC:       pred.store.if26:
 ; UNROLL-NO-IC-NEXT:    [[TMP64:%.*]] = add i32 [[INDEX]], 5
-; UNROLL-NO-IC-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP64]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP7]], i32* [[TMP65]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP64]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP7]], ptr [[TMP65]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE27]]
 ; UNROLL-NO-IC:       pred.store.continue27:
 ; UNROLL-NO-IC-NEXT:    [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP66]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
 ; UNROLL-NO-IC:       pred.store.if28:
 ; UNROLL-NO-IC-NEXT:    [[TMP67:%.*]] = add i32 [[INDEX]], 6
-; UNROLL-NO-IC-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP67]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP8]], i32* [[TMP68]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP67]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP8]], ptr [[TMP68]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE29]]
 ; UNROLL-NO-IC:       pred.store.continue29:
 ; UNROLL-NO-IC-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]]
 ; UNROLL-NO-IC:       pred.store.if30:
 ; UNROLL-NO-IC-NEXT:    [[TMP70:%.*]] = add i32 [[INDEX]], 7
-; UNROLL-NO-IC-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP70]]
-; UNROLL-NO-IC-NEXT:    store i32 [[TMP9]], i32* [[TMP71]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP70]]
+; UNROLL-NO-IC-NEXT:    store i32 [[TMP9]], ptr [[TMP71]], align 4
 ; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE31]]
 ; UNROLL-NO-IC:       pred.store.continue31:
 ; UNROLL-NO-IC-NEXT:    [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
@@ -3298,10 +3257,10 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
 ; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; UNROLL-NO-IC-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
+; UNROLL-NO-IC-NEXT:    [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
 ; UNROLL-NO-IC-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
 ; UNROLL-NO-IC-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
-; UNROLL-NO-IC-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
+; UNROLL-NO-IC-NEXT:    store i32 [[VAR3]], ptr [[G]], align 4
 ; UNROLL-NO-IC-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
 ; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; UNROLL-NO-IC-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
@@ -3321,55 +3280,55 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
-; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE10]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE10]] ]
-; UNROLL-NO-VF-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE10]] ]
+; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE7]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE7]] ]
+; UNROLL-NO-VF-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE7]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
-; UNROLL-NO-VF-NEXT:    [[INDUCTION4:%.*]] = add i32 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION5:%.*]] = add i32 [[OFFSET_IDX]], -1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
 ; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[VEC_IV6:%.*]] = add i32 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], [[TRIP_COUNT_MINUS_1]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
+; UNROLL-NO-VF-NEXT:    [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
 ; UNROLL-NO-VF:       pred.udiv.if:
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION4]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[TMP2]]
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
 ; UNROLL-NO-VF:       pred.udiv.continue:
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
+; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP5]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
 ; UNROLL-NO-VF:       pred.udiv.if4:
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION5]]
-; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = udiv i32 219220132, [[TMP3]]
+; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
 ; UNROLL-NO-VF:       pred.udiv.continue5:
-; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF7]] ]
-; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
-; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI2]], [[TMP5]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL-NO-VF-NEXT:    [[TMP9]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP8]], [[PRED_UDIV_IF4]] ]
+; UNROLL-NO-VF-NEXT:    [[TMP10]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
+; UNROLL-NO-VF-NEXT:    [[TMP11]] = add i32 [[VEC_PHI2]], [[TMP7]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; UNROLL-NO-VF:       pred.store.if:
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION4]], i32* [[TMP10]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP12]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP2]], ptr [[TMP13]], align 4
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; UNROLL-NO-VF:       pred.store.continue:
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; UNROLL-NO-VF:       pred.store.if6:
-; UNROLL-NO-VF-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION3]]
-; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION5]], i32* [[TMP11]], align 4
-; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE10]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = add i32 [[INDEX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP14]]
+; UNROLL-NO-VF-NEXT:    store i32 [[TMP3]], ptr [[TMP15]], align 4
+; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; UNROLL-NO-VF:       pred.store.continue7:
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = select i1 [[TMP4]], i32 [[TMP10]], i32 [[VEC_PHI]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = select i1 [[TMP5]], i32 [[TMP11]], i32 [[VEC_PHI2]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25]], !llvm.loop [[LOOP29:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25]], !llvm.loop [[LOOP29:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
-; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
+; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]]
 ; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
@@ -3382,10 +3341,10 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
 ; UNROLL-NO-VF-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; UNROLL-NO-VF-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
+; UNROLL-NO-VF-NEXT:    [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
 ; UNROLL-NO-VF-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
 ; UNROLL-NO-VF-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
-; UNROLL-NO-VF-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 [[VAR3]], ptr [[G]], align 4
 ; UNROLL-NO-VF-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
 ; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; UNROLL-NO-VF-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
@@ -3455,32 +3414,32 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; SINK-AFTER-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; SINK-AFTER:       pred.store.if:
 ; SINK-AFTER-NEXT:    [[TMP26:%.*]] = add i32 [[INDEX]], 0
-; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP26]]
-; SINK-AFTER-NEXT:    store i32 [[TMP2]], i32* [[TMP27]], align 4
+; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
+; SINK-AFTER-NEXT:    store i32 [[TMP2]], ptr [[TMP27]], align 4
 ; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; SINK-AFTER:       pred.store.continue:
 ; SINK-AFTER-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
 ; SINK-AFTER-NEXT:    br i1 [[TMP28]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; SINK-AFTER:       pred.store.if8:
 ; SINK-AFTER-NEXT:    [[TMP29:%.*]] = add i32 [[INDEX]], 1
-; SINK-AFTER-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]]
-; SINK-AFTER-NEXT:    store i32 [[TMP3]], i32* [[TMP30]], align 4
+; SINK-AFTER-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP29]]
+; SINK-AFTER-NEXT:    store i32 [[TMP3]], ptr [[TMP30]], align 4
 ; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; SINK-AFTER:       pred.store.continue9:
 ; SINK-AFTER-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
 ; SINK-AFTER-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; SINK-AFTER:       pred.store.if10:
 ; SINK-AFTER-NEXT:    [[TMP32:%.*]] = add i32 [[INDEX]], 2
-; SINK-AFTER-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]]
-; SINK-AFTER-NEXT:    store i32 [[TMP4]], i32* [[TMP33]], align 4
+; SINK-AFTER-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP32]]
+; SINK-AFTER-NEXT:    store i32 [[TMP4]], ptr [[TMP33]], align 4
 ; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; SINK-AFTER:       pred.store.continue11:
 ; SINK-AFTER-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
 ; SINK-AFTER-NEXT:    br i1 [[TMP34]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13]]
 ; SINK-AFTER:       pred.store.if12:
 ; SINK-AFTER-NEXT:    [[TMP35:%.*]] = add i32 [[INDEX]], 3
-; SINK-AFTER-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]]
-; SINK-AFTER-NEXT:    store i32 [[TMP5]], i32* [[TMP36]], align 4
+; SINK-AFTER-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP35]]
+; SINK-AFTER-NEXT:    store i32 [[TMP5]], ptr [[TMP36]], align 4
 ; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; SINK-AFTER:       pred.store.continue13:
 ; SINK-AFTER-NEXT:    [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
@@ -3507,10 +3466,10 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
 ; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
 ; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
 ; SINK-AFTER-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; SINK-AFTER-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
+; SINK-AFTER-NEXT:    [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]]
 ; SINK-AFTER-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
 ; SINK-AFTER-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
-; SINK-AFTER-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
+; SINK-AFTER-NEXT:    store i32 [[VAR3]], ptr [[G]], align 4
 ; SINK-AFTER-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
 ; SINK-AFTER-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; SINK-AFTER-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
@@ -3528,10 +3487,10 @@ bb:
   %iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
   %var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
   %var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
-  %g = getelementptr inbounds i32, i32* %x, i32 %iv
+  %g = getelementptr inbounds i32, ptr %x, i32 %iv
   %var6 = add i32 %var5, %var4
   %var7 = udiv i32 219220132, %var3
-  store i32 %var3, i32* %g, align 4
+  store i32 %var3, ptr %g, align 4
   %var8 = add nsw i32 %var3, -1
   %iv.next = add nsw i32 %iv, 1
   %var9 = icmp slt i32 %var3, 2
@@ -3540,7 +3499,7 @@ bb:
 
 ; %vec.dead will be marked as dead instruction in the vector loop and no recipe
 ; will be created for it. Make sure a valid sink target is used.
-define void @sink_after_dead_inst(i32* %A.ptr) {
+define void @sink_after_dead_inst(ptr %A.ptr) {
 ; UNROLL-NO-IC-LABEL: @sink_after_dead_inst(
 ; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -3562,18 +3521,16 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; UNROLL-NO-IC-NEXT:    [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32>
 ; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
-; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[TMP1]]
-; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP13]], align 4
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[TMP10]], i32 4
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
-; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP15]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
+; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
+; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
+; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP12]], align 4
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP13]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
-; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
+; UNROLL-NO-IC-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
 ; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
@@ -3594,8 +3551,8 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; UNROLL-NO-IC-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
 ; UNROLL-NO-IC-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
 ; UNROLL-NO-IC-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
-; UNROLL-NO-IC-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
-; UNROLL-NO-IC-NEXT:    store i32 0, i32* [[A_GEP]], align 4
+; UNROLL-NO-IC-NEXT:    [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
+; UNROLL-NO-IC-NEXT:    store i32 0, ptr [[A_GEP]], align 4
 ; UNROLL-NO-IC-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
 ; UNROLL-NO-IC:       for.end:
 ; UNROLL-NO-IC-NEXT:    ret void
@@ -3607,28 +3564,28 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-VF:       vector.body:
 ; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
-; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
-; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i16 [[INDUCTION]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION1]], 1
-; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = or i16 [[TMP0]], [[TMP0]]
-; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP5]] = zext i16 [[TMP3]] to i32
-; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[INDUCTION]]
-; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[INDUCTION1]]
-; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP6]], align 4
-; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP7]], align 4
+; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i16 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = or i16 [[TMP2]], [[TMP2]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = or i16 [[TMP3]], [[TMP3]]
+; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP7]] = zext i16 [[TMP5]] to i32
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
+; UNROLL-NO-VF-NEXT:    store i32 0, ptr [[TMP8]], align 4
+; UNROLL-NO-VF-NEXT:    store i32 0, ptr [[TMP9]], align 4
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; UNROLL-NO-VF-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
+; UNROLL-NO-VF-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
 ; UNROLL-NO-VF:       middle.block:
 ; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
 ; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; UNROLL-NO-VF:       scalar.ph:
-; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; UNROLL-NO-VF-NEXT:    br label [[LOOP:%.*]]
 ; UNROLL-NO-VF:       loop:
@@ -3642,8 +3599,8 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; UNROLL-NO-VF-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
 ; UNROLL-NO-VF-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
 ; UNROLL-NO-VF-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
-; UNROLL-NO-VF-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
-; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[A_GEP]], align 4
+; UNROLL-NO-VF-NEXT:    [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
+; UNROLL-NO-VF-NEXT:    store i32 0, ptr [[A_GEP]], align 4
 ; UNROLL-NO-VF-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP32:![0-9]+]]
 ; UNROLL-NO-VF:       for.end:
 ; UNROLL-NO-VF-NEXT:    ret void
@@ -3663,14 +3620,13 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
 ; SINK-AFTER-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[TMP5]], i32 0
-; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; SINK-AFTER-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP7]], align 4
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0
+; SINK-AFTER-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
-; SINK-AFTER-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; SINK-AFTER-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; SINK-AFTER-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
+; SINK-AFTER-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; SINK-AFTER:       middle.block:
 ; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
@@ -3691,8 +3647,8 @@ define void @sink_after_dead_inst(i32* %A.ptr) {
 ; SINK-AFTER-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
 ; SINK-AFTER-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
 ; SINK-AFTER-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
-; SINK-AFTER-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
-; SINK-AFTER-NEXT:    store i32 0, i32* [[A_GEP]], align 4
+; SINK-AFTER-NEXT:    [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]]
+; SINK-AFTER-NEXT:    store i32 0, ptr [[A_GEP]], align 4
 ; SINK-AFTER-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
 ; SINK-AFTER:       for.end:
 ; SINK-AFTER-NEXT:    ret void
@@ -3712,8 +3668,8 @@ loop:
   %for.prev = zext i16 %B1 to i32
 
   %ext = zext i1 %B3 to i32
-  %A.gep = getelementptr i32, i32* %A.ptr, i16 %iv
-  store i32 0, i32* %A.gep
+  %A.gep = getelementptr i32, ptr %A.ptr, i16 %iv
+  store i32 0, ptr %A.gep
   br i1 %vec.dead, label %for.end, label %loop
 
 for.end:

diff  --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll
index abf294273ddbc..01affc1a689f2 100644
--- a/llvm/test/Transforms/LoopVectorize/global_alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll
@@ -8,8 +8,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @Foo = common global %struct.anon zeroinitializer, align 4
 @Bar = common global %struct.anon.0 zeroinitializer, align 4
 
- at PB = external global i32*
- at PA = external global i32*
+ at PB = external global ptr
+ at PA = external global ptr
 
 
 ;; === First, the tests that should always vectorize, whether statically or by adding run-time checks ===
@@ -30,36 +30,36 @@ define i32 @noAlias01(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
-  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
-  store i32 %add, i32* %arrayidx1, align 4
+  %4 = load i32, ptr %i, align 4
+  %arrayidx1 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4
+  store i32 %add, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx2, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx2, align 4
   ret i32 %7
 }
 
@@ -78,37 +78,37 @@ define i32 @noAlias02(i32 %a) {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 90
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %add = add nsw i32 %1, 10
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %add
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add1 = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
-  store i32 %add1, i32* %arrayidx2, align 4
+  %4 = load i32, ptr %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4
+  store i32 %add1, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -127,37 +127,37 @@ define i32 @noAlias03(i32 %a) {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %add1 = add nsw i32 %4, 10
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add1
+  store i32 %add, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -179,39 +179,39 @@ define i32 @noAlias04(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32*, i32** @PB, align 4
-  %2 = load i32, i32* %i, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %1, i32 %2
-  %3 = load i32, i32* %add.ptr, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %1 = load ptr, ptr @PB, align 4
+  %2 = load i32, ptr %i, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %1, i32 %2
+  %3 = load i32, ptr %add.ptr, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32*, i32** @PA, align 4
-  %6 = load i32, i32* %i, align 4
-  %add.ptr1 = getelementptr inbounds i32, i32* %5, i32 %6
-  store i32 %add, i32* %add.ptr1, align 4
+  %5 = load ptr, ptr @PA, align 4
+  %6 = load i32, ptr %i, align 4
+  %add.ptr1 = getelementptr inbounds i32, ptr %5, i32 %6
+  store i32 %add, ptr %add.ptr1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32*, i32** @PA, align 4
-  %9 = load i32, i32* %a.addr, align 4
-  %add.ptr2 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32, i32* %add.ptr2, align 4
+  %8 = load ptr, ptr @PA, align 4
+  %9 = load i32, ptr %a.addr, align 4
+  %add.ptr2 = getelementptr inbounds i32, ptr %8, i32 %9
+  %10 = load i32, ptr %add.ptr2, align 4
   ret i32 %10
 }
 
@@ -231,43 +231,43 @@ entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
   %N = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 10, i32* %N, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 10, ptr %N, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
-  %2 = load i32, i32* %N, align 4
-  %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
-  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
-  %3 = load i32, i32* %arrayidx1, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %i, align 4
+  %2 = load i32, ptr %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr getelementptr inbounds (%struct.anon.0, ptr @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx1 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %1
+  %3 = load i32, ptr %arrayidx1, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32, i32* %i, align 4
-  %6 = load i32, i32* %N, align 4
-  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx2, i32 0, i32 %5
-  store i32 %add, i32* %arrayidx3, align 4
+  %5 = load i32, ptr %i, align 4
+  %6 = load i32, ptr %N, align 4
+  %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr %arrayidx2, i32 0, i32 %5
+  store i32 %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32, i32* %a.addr, align 4
-  %9 = load i32, i32* %N, align 4
-  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
-  %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx4, i32 0, i32 %8
-  %10 = load i32, i32* %arrayidx5, align 4
+  %8 = load i32, ptr %a.addr, align 4
+  %9 = load i32, ptr %N, align 4
+  %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9
+  %arrayidx5 = getelementptr inbounds [100 x i32], ptr %arrayidx4, i32 0, i32 %8
+  %10 = load i32, ptr %arrayidx5, align 4
   ret i32 %10
 }
 
@@ -287,44 +287,44 @@ entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
   %N = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 10, i32* %N, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 10, ptr %N, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
-  %2 = load i32, i32* %N, align 4
+  %1 = load i32, ptr %i, align 4
+  %2 = load i32, ptr %N, align 4
   %add = add nsw i32 %2, 1
-  %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
-  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
-  %3 = load i32, i32* %arrayidx1, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %add
+  %arrayidx1 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %1
+  %3 = load i32, ptr %arrayidx1, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add2 = add nsw i32 %3, %4
-  %5 = load i32, i32* %i, align 4
-  %6 = load i32, i32* %N, align 4
-  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx3, i32 0, i32 %5
-  store i32 %add2, i32* %arrayidx4, align 4
+  %5 = load i32, ptr %i, align 4
+  %6 = load i32, ptr %N, align 4
+  %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr %arrayidx3, i32 0, i32 %5
+  store i32 %add2, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32, i32* %a.addr, align 4
-  %9 = load i32, i32* %N, align 4
-  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
-  %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %8
-  %10 = load i32, i32* %arrayidx6, align 4
+  %8 = load i32, ptr %a.addr, align 4
+  %9 = load i32, ptr %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9
+  %arrayidx6 = getelementptr inbounds [100 x i32], ptr %arrayidx5, i32 0, i32 %8
+  %10 = load i32, ptr %arrayidx6, align 4
   ret i32 %10
 }
 
@@ -342,40 +342,40 @@ define i32 @noAlias07(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx5, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -394,40 +394,40 @@ define i32 @noAlias08(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 90
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx5, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -446,40 +446,40 @@ define i32 @noAlias09(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 10
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx5, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -501,45 +501,45 @@ define i32 @noAlias10(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32*, i32** @PB, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
-  %2 = load i32, i32* %i, align 4
+  %1 = load ptr, ptr @PB, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %1, i32 100
+  %2 = load i32, ptr %i, align 4
   %idx.neg = sub i32 0, %2
-  %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
-  %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
-  %3 = load i32, i32* %add.ptr2, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32, ptr %add.ptr1, i32 -1
+  %3 = load i32, ptr %add.ptr2, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32*, i32** @PA, align 4
-  %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 100
-  %6 = load i32, i32* %i, align 4
+  %5 = load ptr, ptr @PA, align 4
+  %add.ptr3 = getelementptr inbounds i32, ptr %5, i32 100
+  %6 = load i32, ptr %i, align 4
   %idx.neg4 = sub i32 0, %6
-  %add.ptr5 = getelementptr inbounds i32, i32* %add.ptr3, i32 %idx.neg4
-  %add.ptr6 = getelementptr inbounds i32, i32* %add.ptr5, i32 -1
-  store i32 %add, i32* %add.ptr6, align 4
+  %add.ptr5 = getelementptr inbounds i32, ptr %add.ptr3, i32 %idx.neg4
+  %add.ptr6 = getelementptr inbounds i32, ptr %add.ptr5, i32 -1
+  store i32 %add, ptr %add.ptr6, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32*, i32** @PA, align 4
-  %9 = load i32, i32* %a.addr, align 4
-  %add.ptr7 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32, i32* %add.ptr7, align 4
+  %8 = load ptr, ptr @PA, align 4
+  %9 = load i32, ptr %a.addr, align 4
+  %add.ptr7 = getelementptr inbounds i32, ptr %8, i32 %9
+  %10 = load i32, ptr %add.ptr7, align 4
   ret i32 %10
 }
 
@@ -559,47 +559,47 @@ entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
   %N = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 10, i32* %N, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 10, ptr %N, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %2 = load i32, i32* %N, align 4
-  %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
-  %3 = load i32, i32* %arrayidx2, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %2 = load i32, ptr %N, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr getelementptr inbounds (%struct.anon.0, ptr @Bar, i32 0, i32 2), i32 0, i32 %2
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %sub1
+  %3 = load i32, ptr %arrayidx2, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %sub3 = sub nsw i32 100, %5
   %sub4 = sub nsw i32 %sub3, 1
-  %6 = load i32, i32* %N, align 4
-  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
-  %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %sub4
-  store i32 %add, i32* %arrayidx6, align 4
+  %6 = load i32, ptr %N, align 4
+  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6
+  %arrayidx6 = getelementptr inbounds [100 x i32], ptr %arrayidx5, i32 0, i32 %sub4
+  store i32 %add, ptr %arrayidx6, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32, i32* %a.addr, align 4
-  %9 = load i32, i32* %N, align 4
-  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
-  %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx7, i32 0, i32 %8
-  %10 = load i32, i32* %arrayidx8, align 4
+  %8 = load i32, ptr %a.addr, align 4
+  %9 = load i32, ptr %N, align 4
+  %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9
+  %arrayidx8 = getelementptr inbounds [100 x i32], ptr %arrayidx7, i32 0, i32 %8
+  %10 = load i32, ptr %arrayidx8, align 4
   ret i32 %10
 }
 
@@ -619,48 +619,48 @@ entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
   %N = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 10, i32* %N, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 10, ptr %N, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %2 = load i32, i32* %N, align 4
+  %2 = load i32, ptr %N, align 4
   %add = add nsw i32 %2, 1
-  %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
-  %3 = load i32, i32* %arrayidx2, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %add
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %sub1
+  %3 = load i32, ptr %arrayidx2, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add3 = add nsw i32 %3, %4
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %sub4 = sub nsw i32 100, %5
   %sub5 = sub nsw i32 %sub4, 1
-  %6 = load i32, i32* %N, align 4
-  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
-  %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx6, i32 0, i32 %sub5
-  store i32 %add3, i32* %arrayidx7, align 4
+  %6 = load i32, ptr %N, align 4
+  %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6
+  %arrayidx7 = getelementptr inbounds [100 x i32], ptr %arrayidx6, i32 0, i32 %sub5
+  store i32 %add3, ptr %arrayidx7, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32, i32* %a.addr, align 4
-  %9 = load i32, i32* %N, align 4
-  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
-  %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx8, i32 0, i32 %8
-  %10 = load i32, i32* %arrayidx9, align 4
+  %8 = load i32, ptr %a.addr, align 4
+  %9 = load i32, ptr %N, align 4
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9
+  %arrayidx9 = getelementptr inbounds [100 x i32], ptr %arrayidx8, i32 0, i32 %8
+  %10 = load i32, ptr %arrayidx9, align 4
   ret i32 %10
 }
 
@@ -679,37 +679,37 @@ define i32 @noAlias13(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %add = add nsw i32 %1, 4
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add1 = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
-  store i32 %add1, i32* %arrayidx2, align 4
+  %4 = load i32, ptr %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4
+  store i32 %add1, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -728,40 +728,40 @@ define i32 @noAlias14(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 5
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx5, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -784,38 +784,38 @@ define i32 @mayAlias01(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
-  store i32 %add, i32* %arrayidx2, align 4
+  %4 = load i32, ptr %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4
+  store i32 %add, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -834,38 +834,38 @@ define i32 @mayAlias02(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %1 = load i32, ptr %i, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %4
   %sub1 = sub nsw i32 %sub, 1
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1
+  store i32 %add, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -884,42 +884,42 @@ define i32 @mayAlias03(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32*, i32** @PB, align 4
-  %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
-  %2 = load i32, i32* %i, align 4
+  %1 = load ptr, ptr @PB, align 4
+  %add.ptr = getelementptr inbounds i32, ptr %1, i32 100
+  %2 = load i32, ptr %i, align 4
   %idx.neg = sub i32 0, %2
-  %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
-  %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
-  %3 = load i32, i32* %add.ptr2, align 4
-  %4 = load i32, i32* %a.addr, align 4
+  %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %idx.neg
+  %add.ptr2 = getelementptr inbounds i32, ptr %add.ptr1, i32 -1
+  %3 = load i32, ptr %add.ptr2, align 4
+  %4 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32*, i32** @PA, align 4
-  %6 = load i32, i32* %i, align 4
-  %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6
-  store i32 %add, i32* %add.ptr3, align 4
+  %5 = load ptr, ptr @PA, align 4
+  %6 = load i32, ptr %i, align 4
+  %add.ptr3 = getelementptr inbounds i32, ptr %5, i32 %6
+  store i32 %add, ptr %add.ptr3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %inc = add nsw i32 %7, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32*, i32** @PA, align 4
-  %9 = load i32, i32* %a.addr, align 4
-  %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32, i32* %add.ptr4, align 4
+  %8 = load ptr, ptr @PA, align 4
+  %9 = load i32, ptr %a.addr, align 4
+  %add.ptr4 = getelementptr inbounds i32, ptr %8, i32 %9
+  %10 = load i32, ptr %add.ptr4, align 4
   ret i32 %10
 }
 
@@ -941,39 +941,39 @@ define i32 @mustAlias01(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %add2 = add nsw i32 %4, 10
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx4, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx4, align 4
   ret i32 %7
 }
 
@@ -991,38 +991,38 @@ define i32 @mustAlias02(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
-  %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
-  store i32 %add, i32* %arrayidx2, align 4
+  %4 = load i32, ptr %i, align 4
+  %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4
+  store i32 %add, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx3, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -1040,38 +1040,38 @@ define i32 @mustAlias03(i32 %a) nounwind {
 entry:
   %a.addr = alloca i32, align 4
   %i = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store i32 0, i32* %i, align 4
+  store i32 %a, ptr %a.addr, align 4
+  store i32 0, ptr %i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32, i32* %i, align 4
+  %1 = load i32, ptr %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32, i32* %arrayidx, align 4
-  %3 = load i32, i32* %a.addr, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1
+  %2 = load i32, ptr %arrayidx, align 4
+  %3 = load i32, ptr %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32, i32* %i, align 4
+  %4 = load i32, ptr %i, align 4
   %add2 = add nsw i32 %4, 10
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32, i32* %i, align 4
+  %5 = load i32, ptr %i, align 4
   %inc = add nsw i32 %5, 1
-  store i32 %inc, i32* %i, align 4
+  store i32 %inc, ptr %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32, i32* %a.addr, align 4
-  %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon, %struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32, i32* %arrayidx4, align 4
+  %6 = load i32, ptr %a.addr, align 4
+  %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6
+  %7 = load i32, ptr %arrayidx4, align 4
   ret i32 %7
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
index 6a02a371eafb1..0d5d6db39c7cd 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
@@ -4,17 +4,17 @@
 
 @f = external dso_local global i32, align 4
 
-define void @int_iv_based_on_pointer_iv(i8* %A) {
+define void @int_iv_based_on_pointer_iv(ptr %A) {
 ; VF1-LABEL: @int_iv_based_on_pointer_iv(
 ; VF1:       vector.body:
 ; VF1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; VF1-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
 ; VF1-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF1-NEXT:    [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[INDUCTION]]
-; VF1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[INDUCTION3]]
-; VF1-NEXT:    store i8 0, i8* [[TMP7]], align 1
-; VF1-NEXT:    store i8 0, i8* [[TMP8]], align 1
+; VF1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDUCTION]]
+; VF1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDUCTION3]]
+; VF1-NEXT:    store i8 0, ptr [[TMP7]], align 1
+; VF1-NEXT:    store i8 0, ptr [[TMP8]], align 1
 ; VF1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VF1-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]],
 ; VF1-NEXT:    br i1 [[TMP13]], label %middle.block, label %vector.body
@@ -25,10 +25,10 @@ define void @int_iv_based_on_pointer_iv(i8* %A) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
 ; VF2-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
-; VF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
-; VF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP4]]
-; VF2-NEXT:    store i8 0, i8* [[TMP9]], align 1
-; VF2-NEXT:    store i8 0, i8* [[TMP10]], align 1
+; VF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
+; VF2-NEXT:    store i8 0, ptr [[TMP9]], align 1
+; VF2-NEXT:    store i8 0, ptr [[TMP10]], align 1
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VF2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]],
 ; VF2-NEXT:    br i1 [[TMP14]], label %middle.block, label %vector.body
@@ -38,12 +38,12 @@ entry:
 
 loop:
   %iv.int = phi i64 [ 0, %entry ], [ %iv.int.next, %loop ]
-  %iv.ptr = phi i32* [ null, %entry ], [ %iv.ptr.next, %loop ]
-  %iv.ptr.next = getelementptr inbounds i32, i32* %iv.ptr, i64 1
-  %gep.A = getelementptr inbounds i8, i8* %A, i64 %iv.int
-  store i8 0, i8* %gep.A
-  %iv.int.next = ptrtoint i32* %iv.ptr.next to i64
-  %sub.ptr.sub = sub i64 ptrtoint (i32* @f to i64), %iv.int.next
+  %iv.ptr = phi ptr [ null, %entry ], [ %iv.ptr.next, %loop ]
+  %iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i64 1
+  %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.int
+  store i8 0, ptr %gep.A
+  %iv.int.next = ptrtoint ptr %iv.ptr.next to i64
+  %sub.ptr.sub = sub i64 ptrtoint (ptr @f to i64), %iv.int.next
   %cmp = icmp sgt i64 %sub.ptr.sub, 0
   br i1 %cmp, label %loop, label %exit
 

diff  --git a/llvm/test/Transforms/LoopVectorize/nsw-crash.ll b/llvm/test/Transforms/LoopVectorize/nsw-crash.ll
index 12ff90aa2b859..815018bc42a8f 100644
--- a/llvm/test/Transforms/LoopVectorize/nsw-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -11,10 +11,10 @@ while.body.lr.ph:
   br label %while.body
 
 while.body:
-  %it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
-  %incdec.ptr.i = getelementptr inbounds i32, i32* %it.sroa.0.091, i64 1
+  %it.sroa.0.091 = phi ptr [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
+  %incdec.ptr.i = getelementptr inbounds i32, ptr %it.sroa.0.091, i64 1
   %inc32 = add i32 undef, 1                                        ; <------------- Make sure we don't set NSW flags to the undef.
-  %cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
+  %cmp.i11 = icmp eq ptr %incdec.ptr.i, undef
   br i1 %cmp.i11, label %while.end, label %while.body
 
 while.end:

diff  --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 8e7eb451a25eb..1e90db097446a 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -4,11 +4,11 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 
 
 ; Function Attrs: nofree norecurse nounwind
-define void @a(i8* readnone %b) {
+define void @a(ptr readnone %b) {
 ; CHECK-LABEL: @a(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B1:%.*]] = ptrtoint i8* [[B:%.*]] to i64
-; CHECK-NEXT:    [[CMP_NOT4:%.*]] = icmp eq i8* [[B]], null
+; CHECK-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[CMP_NOT4:%.*]] = icmp eq ptr [[B]], null
 ; CHECK-NEXT:    br i1 [[CMP_NOT4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[B1]]
@@ -18,87 +18,86 @@ define void @a(i8* readnone %b) {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP1]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], -1
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* null, i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 -3
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
-; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
+; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i64 -1
-; CHECK-NEXT:    store i8 95, i8* [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
+; CHECK-NEXT:    store i8 95, ptr [[TMP10]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1
-; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
+; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
 ; CHECK:       pred.store.if5:
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], -1
-; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, i8* null, i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP2]], i64 -1
-; CHECK-NEXT:    store i8 95, i8* [[TMP15]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], -1
+; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1
+; CHECK-NEXT:    store i8 95, ptr [[TMP14]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2
-; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
+; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; CHECK:       pred.store.if7:
-; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP17]], -1
-; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, i8* null, i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP3]], i64 -1
-; CHECK-NEXT:    store i8 95, i8* [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], -1
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1
+; CHECK-NEXT:    store i8 95, ptr [[TMP18]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; CHECK:       pred.store.continue8:
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3
-; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
+; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
 ; CHECK:       pred.store.if9:
-; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], -1
-; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* null, i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP4]], i64 -1
-; CHECK-NEXT:    store i8 95, i8* [[TMP23]], align 1
+; CHECK-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], -1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1
+; CHECK-NEXT:    store i8 95, ptr [[TMP22]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; CHECK:       pred.store.continue10:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ null, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup.loopexit:
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[C_05:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[C_05]], i64 -1
-; CHECK-NEXT:    [[TMP25:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP25]], 0
+; CHECK-NEXT:    [[C_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[C_05]], i64 -1
+; CHECK-NEXT:    [[TMP24:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP24]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i8 95, i8* [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    store i8 95, ptr [[INCDEC_PTR]], align 1
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[B]]
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[B]]
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 
 entry:
-  %cmp.not4 = icmp eq i8* %b, null
+  %cmp.not4 = icmp eq ptr %b, null
   br i1 %cmp.not4, label %for.cond.cleanup, label %for.body.preheader
 
 for.body.preheader:                               ; preds = %entry
@@ -111,18 +110,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
   ret void
 
 for.body:                                         ; preds = %for.body.preheader, %if.end
-  %c.05 = phi i8* [ %incdec.ptr, %if.end ], [ null, %for.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %c.05, i64 -1
-  %0 = load i8, i8* %incdec.ptr, align 1
+  %c.05 = phi ptr [ %incdec.ptr, %if.end ], [ null, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %c.05, i64 -1
+  %0 = load i8, ptr %incdec.ptr, align 1
   %tobool.not = icmp eq i8 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:                                          ; preds = %for.body
-  store i8 95, i8* %incdec.ptr, align 1
+  store i8 95, ptr %incdec.ptr, align 1
   br label %if.end
 
 if.end:                                           ; preds = %for.body, %if.then
-  %cmp.not = icmp eq i8* %incdec.ptr, %b
+  %cmp.not = icmp eq ptr %incdec.ptr, %b
   br i1 %cmp.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
@@ -130,7 +129,7 @@ if.end:                                           ; preds = %for.body, %if.then
 ;  1. As a uniform address for the load, and
 ;  2. Non-uniform use by the getelementptr which is stored. This requires the
 ;     vector value.
-define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias %start.2, i64 %N) {
+define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias %start.2, i64 %N) {
 ; CHECK-LABEL: @pointer_induction_used_as_vector(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -138,48 +137,47 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8*, i8** [[START_1:%.*]], i64 [[N_VEC]]
-; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, i8* [[START_2:%.*]], i64 [[N_VEC]]
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i64 [[N_VEC]], 8
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8*, i8** [[START_1]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8*, i8** [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8** [[TMP3]] to <4 x i8*>*
-; CHECK-NEXT:    store <4 x i8*> [[TMP2]], <4 x i8*>* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i8*> [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 8
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP3]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    store <4 x ptr> [[TMP4]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
-; CHECK-NEXT:    store <4 x i8> [[TMP8]], <4 x i8>* [[TMP9]], align 1
+; CHECK-NEXT:    store <4 x i8> [[TMP8]], ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 4
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i8** [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i8* [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[START_2]], [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP_BODY:%.*]]
 ; CHECK:       loop.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    [[PTR_IV_1:%.*]] = phi i8** [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    [[PTR_IV_2:%.*]] = phi i8* [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ]
-; CHECK-NEXT:    [[PTR_IV_1_NEXT]] = getelementptr inbounds i8*, i8** [[PTR_IV_1]], i64 1
-; CHECK-NEXT:    [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, i8* [[PTR_IV_2]], i64 1
-; CHECK-NEXT:    store i8* [[PTR_IV_2_NEXT]], i8** [[PTR_IV_1]], align 8
-; CHECK-NEXT:    [[LV:%.*]] = load i8, i8* [[PTR_IV_2]], align 1
+; CHECK-NEXT:    [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_1_NEXT:%.*]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    [[PTR_IV_1_NEXT]] = getelementptr inbounds ptr, ptr [[PTR_IV_1]], i64 1
+; CHECK-NEXT:    [[PTR_IV_2_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV_2]], i64 1
+; CHECK-NEXT:    store ptr [[PTR_IV_2_NEXT]], ptr [[PTR_IV_1]], align 8
+; CHECK-NEXT:    [[LV:%.*]] = load i8, ptr [[PTR_IV_2]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[LV]], 1
-; CHECK-NEXT:    store i8 [[ADD]], i8* [[PTR_IV_2]], align 1
+; CHECK-NEXT:    store i8 [[ADD]], ptr [[PTR_IV_2]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 1
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[C]], label [[LOOP_BODY]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -192,14 +190,14 @@ entry:
 
 loop.body:                                    ; preds = %loop.body, %entry
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.body ]
-  %ptr.iv.1 = phi i8** [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ]
-  %ptr.iv.2 = phi i8* [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ]
-  %ptr.iv.1.next = getelementptr inbounds i8*, i8** %ptr.iv.1, i64 1
-  %ptr.iv.2.next = getelementptr inbounds i8, i8* %ptr.iv.2, i64 1
-  store i8* %ptr.iv.2.next, i8** %ptr.iv.1, align 8
-  %lv = load i8, i8* %ptr.iv.2, align 1
+  %ptr.iv.1 = phi ptr [ %start.1, %entry ], [ %ptr.iv.1.next, %loop.body ]
+  %ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop.body ]
+  %ptr.iv.1.next = getelementptr inbounds ptr, ptr %ptr.iv.1, i64 1
+  %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
+  store ptr %ptr.iv.2.next, ptr %ptr.iv.1, align 8
+  %lv = load i8, ptr %ptr.iv.2, align 1
   %add = add i8 %lv, 1
-  store i8 %add, i8* %ptr.iv.2, align 1
+  store i8 %add, ptr %ptr.iv.2, align 1
   %iv.next = add nuw i64 %iv, 1
   %c = icmp ne i64 %iv.next, %N
   br i1 %c, label %loop.body, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 6a07af9bdf8f1..58ad64df1cbe1 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @b = common global i32 0, align 4
 @f = common global i32 0, align 4
 @a = common global i32 0, align 4
- at d = common global i32* null, align 8
- at e = common global i32* null, align 8
+ at d = common global ptr null, align 8
+ at e = common global ptr null, align 8
 @c = common global i32 0, align 4
 
 ; CHECK-LABEL: @fn1(
@@ -30,14 +30,14 @@ for.cond4.preheader:                              ; preds = %for.cond
   br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
 
 for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
-  %0 = load i32*, i32** @e, align 8, !tbaa !4
+  %0 = load ptr, ptr @e, align 8, !tbaa !4
   br label %for.cond7.preheader
 
 for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
   %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
   %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
   %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
-  %1 = load i32, i32* @b, align 4, !tbaa !5
+  %1 = load i32, ptr @b, align 4, !tbaa !5
   %tobool11 = icmp eq i32 %1, 0
   br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
 
@@ -50,9 +50,9 @@ for.body8:                                        ; preds = %for.body8.lr.ph, %f
   %i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
   %2 = trunc i64 %indvars.iv19 to i32
   %add10 = add i32 %add9, %2
-  store i32 %add10, i32* @f, align 4, !tbaa !5
+  store i32 %add10, ptr @f, align 4, !tbaa !5
   %idx.ext = sext i32 %add10 to i64
-  %add.ptr = getelementptr inbounds i32, i32* @a, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i32, ptr @a, i64 %idx.ext
   %tobool129 = icmp eq i32 %i.213, 0
   br i1 %tobool129, label %for.inc19, label %for.body13.lr.ph
 
@@ -63,10 +63,10 @@ for.body13.lr.ph:                                 ; preds = %for.body8
 for.body13:                                       ; preds = %for.body13.lr.ph, %for.body13
   %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
   %add.ptr.sum = add i64 %idx.ext, %indvars.iv
-  %arrayidx = getelementptr inbounds i32, i32* @a, i64 %add.ptr.sum
-  %4 = load i32, i32* %arrayidx, align 4, !tbaa !5
-  %arrayidx15 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
+  %arrayidx = getelementptr inbounds i32, ptr @a, i64 %add.ptr.sum
+  %4 = load i32, ptr %arrayidx, align 4, !tbaa !5
+  %arrayidx15 = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
+  store i32 %4, ptr %arrayidx15, align 4, !tbaa !5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %5 = trunc i64 %indvars.iv.next to i32
   %tobool12 = icmp eq i32 %5, 0
@@ -76,17 +76,17 @@ for.cond11.for.inc19_crit_edge:                   ; preds = %for.body13
   br label %for.inc19
 
 for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
-  %6 = load i32, i32* @c, align 4, !tbaa !5
+  %6 = load i32, ptr @c, align 4, !tbaa !5
   %inc20 = add nsw i32 %6, 1
-  store i32 %inc20, i32* @c, align 4, !tbaa !5
+  store i32 %inc20, ptr @c, align 4, !tbaa !5
   %indvars.iv.next20 = add i64 %indvars.iv19, 1
-  %7 = load i32, i32* @b, align 4, !tbaa !5
+  %7 = load i32, ptr @b, align 4, !tbaa !5
   %tobool = icmp eq i32 %7, 0
   br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
 
 for.cond7.for.inc23_crit_edge:                    ; preds = %for.inc19
-  %add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
-  store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
+  %add.ptr.lcssa = phi ptr [ %add.ptr, %for.inc19 ]
+  store ptr %add.ptr.lcssa, ptr @d, align 8, !tbaa !4
   br label %for.inc23
 
 for.inc23:                                        ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader

diff  --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index d85a96470823d..8b0aaf2284878 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -15,29 +15,51 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16
-; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 1
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <2 x i16>*
-; CHECK-NEXT:    store <2 x i16> [[PREDPHI]], <2 x i16>* [[TMP11]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <2 x i1> [[TMP5]], <i1 true, i1 true>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
+; CHECK-NEXT:    store <2 x i16> [[PREDPHI]], ptr [[TMP8]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32, 32
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
+; CHECK-NEXT:    br label [[LOOP_COND:%.*]]
+; CHECK:       loop.cond:
+; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
+; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.next:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
+; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
+; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -49,8 +71,8 @@ loop.header:
 
 loop.cond:
   %blend = phi i16 [ %iv.trunc, %loop.header ]
-  %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend
-  %lv = load i16, i16* %src.ptr, align 1
+  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
+  %lv = load i16, ptr %src.ptr, align 1
   %cmp.b = icmp sgt i64 %iv, %a
   br i1 %cmp.b, label %loop.next, label %loop.latch
 
@@ -59,8 +81,8 @@ loop.next:
 
 loop.latch:
   %res = phi i16 [ %lv, %loop.cond ], [ 1, %loop.next ]
-  %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv
-  store i16 %res, i16* %dst.ptr
+  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
+  store i16 %res, ptr %dst.ptr
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp439 = icmp ult i64 %iv, 31
   br i1 %cmp439, label %loop.header, label %exit
@@ -80,34 +102,57 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16
-; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[TMP2]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i16, i16* [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP7]], align 1
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP9:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP10:%.*]] = xor <2 x i1> [[TMP8]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP11:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <2 x i1> [[TMP12]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[TMP13]], i32 0
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i16* [[TMP14]] to <2 x i16>*
-; CHECK-NEXT:    store <2 x i16> [[PREDPHI1]], <2 x i16>* [[TMP15]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [32 x i16], ptr @src, i16 0, i16 [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <2 x i1> [[TMP10]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0
+; CHECK-NEXT:    store <2 x i16> [[PREDPHI1]], ptr [[TMP12]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32, 32
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
+; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.cond:
+; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
+; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.next:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
+; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
+; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -120,8 +165,8 @@ loop.header:
 
 loop.cond:
   %blend = phi i16 [ %iv.trunc, %loop.header ]
-  %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend
-  %lv = load i16, i16* %src.ptr, align 1
+  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
+  %lv = load i16, ptr %src.ptr, align 1
   %cmp.b = icmp sgt i64 %iv, %a
   br i1 %cmp.b, label %loop.next, label %loop.latch
 
@@ -130,8 +175,8 @@ loop.next:
 
 loop.latch:
   %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ]
-  %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv
-  store i16 %res, i16* %dst.ptr
+  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
+  store i16 %res, ptr %dst.ptr
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp439 = icmp ult i64 %iv, 31
   br i1 %cmp439, label %loop.header, label %exit
@@ -140,7 +185,7 @@ exit:
   ret void
 }
 
-define void @multiple_incoming_phi_with_blend_mask(i64 %a, i16* noalias %dst) {
+define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
 ; CHECK-LABEL: @multiple_incoming_phi_with_blend_mask(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -154,30 +199,51 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, i16* noalias %dst) {
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 1
-; CHECK-NEXT:    [[INS1:%.+]] = insertelement <2 x i16> poison, i16 [[TMP8]], i32 0
-; CHECK-NEXT:    [[INS2:%.+]] = insertelement <2 x i16> [[INS1]], i16 [[TMP9]], i32 1
-; CHECK-NEXT:    [[DST0:%.+]] = getelementptr inbounds i16, i16* %dst, i64 [[TMP0]]
-; CHECK-NEXT:    [[DST1:%.+]] = getelementptr inbounds i16, i16* [[DST0]], i32 0
-; CHECK-NEXT:    [[DST1_BC:%.+]] = bitcast i16* [[DST1]] to <2 x i16>*
-; CHECK-NEXT:    store <2 x i16> [[INS2]], <2 x i16>* [[DST1_BC]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], <i1 true, i1 true>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i16> [[TMP9]], i16 [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0
+; CHECK-NEXT:    store <2 x i16> [[TMP10]], ptr [[TMP12]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
 ; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], <i16 2, i16 2>
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32, 32
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
+; CHECK-NEXT:    [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16
+; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.next:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ]
+; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT:    store i16 [[LV]], ptr [[DST_PTR]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
+; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -194,10 +260,10 @@ loop.next:
 
 loop.latch:
   %blend = phi i16 [ %iv.trunc, %loop.header ], [ %iv.trunc.2, %loop.next ]
-  %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend
-  %lv = load i16, i16* %src.ptr, align 1
-  %dst.ptr = getelementptr inbounds i16, i16* %dst, i64 %iv
-  store i16 %lv, i16* %dst.ptr
+  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
+  %lv = load i16, ptr %src.ptr, align 1
+  %dst.ptr = getelementptr inbounds i16, ptr %dst, i64 %iv
+  store i16 %lv, ptr %dst.ptr
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp439 = icmp ult i64 %iv, 31
   br i1 %cmp439, label %loop.header, label %exit
@@ -217,17 +283,17 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i16
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP0]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -236,8 +302,8 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -249,17 +315,41 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[TMP19:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP18]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP19]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, i16* [[TMP20]], i32 0
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i16* [[TMP21]] to <2 x i16>*
-; CHECK-NEXT:    store <2 x i16> [[PREDPHI3]], <2 x i16>* [[TMP22]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP20]], i32 0
+; CHECK-NEXT:    store <2 x i16> [[PREDPHI3]], ptr [[TMP21]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, 64
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
+; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.cond:
+; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
+; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
+; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.next:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
+; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 63
+; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -272,8 +362,8 @@ loop.header:
 
 loop.cond:
   %blend = phi i16 [ %iv.trunc, %loop.header ]
-  %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend
-  %lv = load i16, i16* %src.ptr, align 1
+  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
+  %lv = load i16, ptr %src.ptr, align 1
   %cmp.b = icmp sgt i64 %iv, %a
   br i1 %cmp.b, label %loop.next, label %loop.latch
 
@@ -282,8 +372,8 @@ loop.next:
 
 loop.latch:
   %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ]
-  %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv
-  store i16 %res, i16* %dst.ptr
+  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
+  store i16 %res, ptr %dst.ptr
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp439 = icmp ult i64 %iv, 63
   br i1 %cmp439, label %loop.header, label %exit
@@ -293,7 +383,7 @@ exit:
 }
 
 ; Test case for PR44800.
-define void @duplicated_incoming_blocks_blend(i32 %x, i32* %ptr) {
+define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) {
 ; CHECK-LABEL: @duplicated_incoming_blocks_blend(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -303,17 +393,32 @@ define void @duplicated_incoming_blocks_blend(i32 %x, i32* %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 1000, 1000
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ]
+; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[P]]
+; CHECK-NEXT:    store i32 [[P]], ptr [[GEP_PTR]], align 4
+; CHECK-NEXT:    [[ADD_I]] = add nsw i32 [[P]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -325,8 +430,8 @@ loop.header:
 
 loop.latch:
   %p = phi i32 [ %iv, %loop.header ], [ %iv, %loop.header ]
-  %gep.ptr = getelementptr i32, i32* %ptr, i32 %p
-  store i32 %p, i32* %gep.ptr
+  %gep.ptr = getelementptr i32, ptr %ptr, i32 %p
+  store i32 %p, ptr %gep.ptr
   %add.i = add nsw i32 %p, 1
   %cmp = icmp slt i32 %add.i, 1000
   br i1 %cmp, label %loop.header, label %exit

diff  --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
index 7ba39213afcd1..11d0aac742979 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
@@ -9,7 +9,7 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define dso_local void @foo(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) {
+define dso_local void @foo(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp6 = icmp eq i32 %N, 0
   br i1 %cmp6, label %while.end, label %while.body.preheader
@@ -19,16 +19,16 @@ while.body.preheader:
 
 while.body:
   %N.addr.010 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
-  %C.addr.09 = phi i8* [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ]
-  %B.addr.08 = phi i8* [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ]
-  %A.addr.07 = phi i8* [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %A.addr.07, i32 1
-  %0 = load i8, i8* %A.addr.07, align 1
-  %incdec.ptr1 = getelementptr inbounds i8, i8* %B.addr.08, i32 1
-  %1 = load i8, i8* %B.addr.08, align 1
+  %C.addr.09 = phi ptr [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ]
+  %B.addr.08 = phi ptr [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ]
+  %A.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %A.addr.07, i32 1
+  %0 = load i8, ptr %A.addr.07, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, ptr %B.addr.08, i32 1
+  %1 = load i8, ptr %B.addr.08, align 1
   %add = add i8 %1, %0
-  %incdec.ptr4 = getelementptr inbounds i8, i8* %C.addr.09, i32 1
-  store i8 %add, i8* %C.addr.09, align 1
+  %incdec.ptr4 = getelementptr inbounds i8, ptr %C.addr.09, i32 1
+  store i8 %add, ptr %C.addr.09, align 1
   %dec = add i32 %N.addr.010, -1
   %cmp = icmp eq i32 %dec, 0
   br i1 %cmp, label %while.end.loopexit, label %while.body
@@ -43,7 +43,7 @@ while.end:
 ; Make sure a loop is successfully vectorized with fold-tail when the backedge
 ; taken count is constant and used inside the loop. Issue revealed by D76992.
 ;
-define void @reuse_const_btc(i8* %A) optsize {
+define void @reuse_const_btc(ptr %A) optsize {
 ; CHECK-LABEL: @reuse_const_btc
 ; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, <i32 13, i32 13, i32 13, i32 13>
 ; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> <i32 12, i32 12, i32 12, i32 12>, <4 x i32> <i32 13, i32 13, i32 13, i32 13>
@@ -54,7 +54,7 @@ entry:
 loop:
   %riv = phi i32 [ 13, %entry ], [ %rivMinus1, %merge ]
   %sub = sub nuw nsw i32 20, %riv
-  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %sub
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %sub
   %cond0 = icmp eq i32 %riv, 7
   br i1 %cond0, label %then, label %else
 then:
@@ -64,7 +64,7 @@ else:
 merge:
   %blend = phi i32 [ 13, %then ], [ 12, %else ]
   %trunc = trunc i32 %blend to i8
-  store i8 %trunc, i8* %arrayidx, align 1
+  store i8 %trunc, ptr %arrayidx, align 1
   %rivMinus1 = add nuw nsw i32 %riv, -1
   %cond = icmp eq i32 %riv, 0
   br i1 %cond, label %exit, label %loop

diff  --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index f9345f825454c..c07512644f721 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -9,53 +9,53 @@
 ; CHECK-REMARKS-NEXT: remark: {{.*}} interleaved loop (interleaved count: 4)
 ; CHECK-REMARKS-NOT:  remark: {{.*}} vectorized loop
 
-define void @VF1-VPlanExe(i32* %dst) {
+define void @VF1-VPlanExe(ptr %dst) {
 ; CHECK-LABEL: @VF1-VPlanExe(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 14
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 14
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 14
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION]]
-; CHECK-NEXT:    store i32 0, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
-; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; CHECK:       pred.store.if4:
-; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION1]]
-; CHECK-NEXT:    store i32 0, i32* [[TMP5]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; CHECK:       pred.store.continue5:
-; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
 ; CHECK:       pred.store.if6:
-; CHECK-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION2]]
-; CHECK-NEXT:    store i32 0, i32* [[TMP6]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE10]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; CHECK:       pred.store.continue7:
-; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
 ; CHECK:       pred.store.if8:
-; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION3]]
-; CHECK-NEXT:    store i32 0, i32* [[TMP7]], align 4
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE12]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP10]]
+; CHECK-NEXT:    store i32 0, ptr [[TMP11]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; CHECK:       pred.store.continue9:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -65,8 +65,8 @@ define void @VF1-VPlanExe(i32* %dst) {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 0, i32* [[DST_PTR]], align 4
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 0, ptr [[DST_PTR]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -79,20 +79,20 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %dst.ptr = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  store i32 0, i32* %dst.ptr
+  %dst.ptr = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  store i32 0, ptr %dst.ptr
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 15
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
+define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) {
 ; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds double, double* [[PTR1:%.*]], i64 15
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds double, ptr [[PTR1:%.*]], i64 15
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[PTR1]], i64 16
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[PTR1]], i64 128
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
@@ -107,59 +107,65 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP4]]
-; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[NEXT_GEP]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; CHECK:       pred.store.if7:
-; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP5]]
-; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP1]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 8
+; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[NEXT_GEP1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; CHECK:       pred.store.continue8:
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; CHECK:       pred.store.if9:
-; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP6]]
-; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP2]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 8
+; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP9]]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[NEXT_GEP2]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; CHECK:       pred.store.continue10:
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
 ; CHECK:       pred.store.if11:
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP7]]
-; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP3]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 8
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP11]]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; CHECK:       pred.store.continue12:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR1]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[ADDR:%.*]] = phi double* [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    store double 0.000000e+00, double* [[ADDR]], align 8
-; CHECK-NEXT:    [[PTR]] = getelementptr inbounds double, double* [[ADDR]], i64 1
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq double* [[PTR]], [[PTR2]]
+; CHECK-NEXT:    [[ADDR:%.*]] = phi ptr [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    store double 0.000000e+00, ptr [[ADDR]], align 8
+; CHECK-NEXT:    [[PTR]] = getelementptr inbounds double, ptr [[ADDR]], i64 1
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[PTR]], [[PTR2]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ;
 entry:
-  %ptr2 = getelementptr inbounds double, double* %ptr1, i64 15
+  %ptr2 = getelementptr inbounds double, ptr %ptr1, i64 15
   br label %for.body
 
 for.cond.cleanup:
   ret void
 
 for.body:
-  %addr = phi double* [ %ptr, %for.body ], [ %ptr1, %entry ]
-  store double 0.0, double* %addr
-  %ptr = getelementptr inbounds double, double* %addr, i64 1
-  %cond = icmp eq double* %ptr, %ptr2
+  %addr = phi ptr [ %ptr, %for.body ], [ %ptr1, %entry ]
+  store double 0.0, ptr %addr
+  %ptr = getelementptr inbounds double, ptr %addr, i64 1
+  %cond = icmp eq ptr %ptr, %ptr2
   br i1 %cond, label %for.cond.cleanup, label %for.body
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-REMARKS: {{.*}}

diff  --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
index 1c70cc9f8d224..a11db7ea3ae1e 100644
--- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
+++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
@@ -12,135 +12,131 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define void @basic_loop(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
+define void @basic_loop(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
 ; CHECK-LABEL: @basic_loop(
 ; CHECK-NEXT:  header:
-; CHECK-NEXT:    [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
+; CHECK-NEXT:    [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>*
-; CHECK-NEXT:    store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    store <4 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
 ; CHECK-NEXT:    [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
+; CHECK-NEXT:    [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[DEC66]], -1
-; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    store i8 [[TMP7]], i8* [[BUFF]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    store i8 [[TMP5]], ptr [[BUFF]], align 1
 ; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4
 ; CHECK-NEXT:    ret void
 ;
 header:
-  %ptr0 = load i8*, i8** %pos, align 4
+  %ptr0 = load ptr, ptr %pos, align 4
   br label %body
 
 body:
   %dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
-  %buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
+  %buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1
   %dec = add nsw i32 %dec66, -1
-  %0 = load i8, i8* %incdec.ptr, align 1
-  store i8 %0, i8* %buff, align 1
+  %0 = load i8, ptr %incdec.ptr, align 1
+  store i8 %0, ptr %buff, align 1
   %tobool11 = icmp eq i32 %dec, 0
   br i1 %tobool11, label %end, label %body
 
 end:
-  store i8* %incdec.ptr, i8** %pos, align 4
+  store ptr %incdec.ptr, ptr %pos, align 4
   ret void
 }
 
-define void @metadata(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
+define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
 ; CHECK-LABEL: @metadata(
 ; CHECK-NEXT:  header:
-; CHECK-NEXT:    [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
+; CHECK-NEXT:    [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
-; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
+; CHECK-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>*
-; CHECK-NEXT:    store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    store <4 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
 ; CHECK-NEXT:    [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
+; CHECK-NEXT:    [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1
 ; CHECK-NEXT:    [[DEC]] = add nsw i32 [[DEC66]], -1
-; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
-; CHECK-NEXT:    store i8 [[TMP7]], i8* [[BUFF]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    store i8 [[TMP5]], ptr [[BUFF]], align 1
 ; CHECK-NEXT:    [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4
 ; CHECK-NEXT:    ret void
 ;
 header:
-  %ptr0 = load i8*, i8** %pos, align 4
+  %ptr0 = load ptr, ptr %pos, align 4
   br label %body
 
 body:
   %dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
-  %buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
+  %buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1
   %dec = add nsw i32 %dec66, -1
-  %0 = load i8, i8* %incdec.ptr, align 1
-  store i8 %0, i8* %buff, align 1
+  %0 = load i8, ptr %incdec.ptr, align 1
+  store i8 %0, ptr %buff, align 1
   %tobool11 = icmp eq i32 %dec, 0
   br i1 %tobool11, label %end, label %body, !llvm.loop !1
 
 end:
-  store i8* %incdec.ptr, i8** %pos, align 4
+  store ptr %incdec.ptr, ptr %pos, align 4
   ret void
 }