[llvm] LAA: strip unit-stride check in getPtrStride (PR #122879)

Tue Jan 14 01:58:43 PST 2025

https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/122879

Strip an unnecessary unit-stride check in getPtrStride, when checking whether null pointer dereference is undefined. This patch is similar in spirit to a353e25 (Don't require Stride == 1/-1 for inbounds pointer AddRecs nowrap.), and has the effect of removing redundant SCEV checks.

-- 8< --
Requires rebase after #122876 is landed.

>From f9f1af11e76355c43afba50ca2b296d48fd9d820 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 14 Jan 2025 09:08:33 +0000
Subject: [PATCH] LAA: strip unit-stride check in getPtrStride

Strip an unnecessary unit-stride check in getPtrStride, when checking
whether null pointer dereference is undefined. This patch is similar in
spirit to a353e25 (Don't require Stride == 1/-1 for inbounds pointer
AddRecs nowrap.), and has the effect of removing redundant SCEV checks.
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |   9 +-
 .../LoopAccessAnalysis/nusw-predicates.ll     |   3 -
 .../wrapping-pointer-versioning.ll            |   4 -
 .../AArch64/conditional-branches-cost.ll      | 128 +++-------
 .../LoopVectorize/X86/interleave-cost.ll      | 232 ++----------------
 .../LoopVersioning/incorrect-phi.ll           |  48 +++-
 .../wrapping-pointer-versioning.ll            |  63 +----
 7 files changed, 117 insertions(+), 370 deletions(-)

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2a68979add666d..4122afcc1db3c8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1520,12 +1520,11 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
       GEP && GEP->hasNoUnsignedSignedWrap())
     return Stride;
 
-  // If the null pointer is undefined, then a access sequence which would
-  // otherwise access it can be assumed not to unsigned wrap.  Note that this
-  // assumes the object in memory is aligned to the natural alignment.
+  // If the null pointer dereference is undefined, then a access sequence which
+  // would otherwise access it can be assumed not to unsigned wrap. Note that
+  // this assumes the object in memory is aligned to the natural alignment.
   unsigned AddrSpace = Ty->getPointerAddressSpace();
-  if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
-      (Stride == 1 || Stride == -1))
+  if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace))
     return Stride;
 
   if (Assume) {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
index 5234d8f107271a..f2c1fb3e22b071 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll
@@ -19,7 +19,6 @@ define void @int_and_pointer_predicate(ptr %v, i32 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
-; CHECK-NEXT:      {%v,+,4}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
@@ -93,8 +92,6 @@ define void @int_and_multiple_pointer_predicates(ptr %v, ptr %w, i32 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
-; CHECK-NEXT:      {%v,+,4}<%loop> Added Flags: <nusw>
-; CHECK-NEXT:      {%w,+,4}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ;
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index 71c20bc2b2a824..457c85d2d7f916 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -39,7 +39,6 @@ define void @f1(ptr noalias %a, ptr noalias %b, i64 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {0,+,2}<%for.body> Added Flags: <nusw>
-; CHECK-NEXT:      {%a,+,4}<%for.body> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
@@ -115,7 +114,6 @@ define void @f2(ptr noalias %a, ptr noalias %b, i64 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
-; CHECK-NEXT:      {((4 * (zext i31 (trunc i64 %N to i31) to i64))<nuw><nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
@@ -176,7 +174,6 @@ define void @f3(ptr noalias %a, ptr noalias %b, i64 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {0,+,2}<%for.body> Added Flags: <nssw>
-; CHECK-NEXT:      {%a,+,4}<%for.body> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
@@ -233,7 +230,6 @@ define void @f4(ptr noalias %a, ptr noalias %b, i64 %N) {
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
 ; CHECK-NEXT:      {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
-; CHECK-NEXT:      {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidxA = getelementptr i16, ptr %a, i64 %mul_ext:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index caa98d766a8c34..e6f1099f12b39b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1120,35 +1120,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[SRC_3:%.*]], ptr noalias [[SRC_4:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
-; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
-; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; DEFAULT:       vector.scevcheck:
-; DEFAULT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
-; DEFAULT-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; DEFAULT-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; DEFAULT-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
-; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; DEFAULT-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
-; DEFAULT-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; DEFAULT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8
-; DEFAULT-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; DEFAULT-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; DEFAULT-NEXT:    [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; DEFAULT-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
-; DEFAULT-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
-; DEFAULT-NEXT:    [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; DEFAULT-NEXT:    [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
-; DEFAULT-NEXT:    [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
-; DEFAULT-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]]
-; DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]]
-; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]]
-; DEFAULT-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
-; DEFAULT-NEXT:    [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; DEFAULT-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
-; DEFAULT-NEXT:    br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; DEFAULT-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP0]], 8
+; DEFAULT-NEXT:    br i1 [[TMP14]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; DEFAULT:       vector.ph:
 ; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
 ; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
@@ -1190,7 +1163,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT:       pred.store.continue:
 ; DEFAULT-NEXT:    [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
 ; DEFAULT-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
-; DEFAULT:       pred.store.if14:
+; DEFAULT:       pred.store.if7:
 ; DEFAULT-NEXT:    [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP32]], align 4
 ; DEFAULT-NEXT:    [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
@@ -1202,10 +1175,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP37]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE15]]
-; DEFAULT:       pred.store.continue15:
+; DEFAULT:       pred.store.continue8:
 ; DEFAULT-NEXT:    [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
 ; DEFAULT-NEXT:    br i1 [[TMP38]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
-; DEFAULT:       pred.store.if16:
+; DEFAULT:       pred.store.if9:
 ; DEFAULT-NEXT:    [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP39]], align 4
 ; DEFAULT-NEXT:    [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
@@ -1217,10 +1190,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP44]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE17]]
-; DEFAULT:       pred.store.continue17:
+; DEFAULT:       pred.store.continue10:
 ; DEFAULT-NEXT:    [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
 ; DEFAULT-NEXT:    br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
-; DEFAULT:       pred.store.if18:
+; DEFAULT:       pred.store.if11:
 ; DEFAULT-NEXT:    [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP46]], align 4
 ; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
@@ -1232,10 +1205,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP51]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE19]]
-; DEFAULT:       pred.store.continue19:
+; DEFAULT:       pred.store.continue12:
 ; DEFAULT-NEXT:    [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
 ; DEFAULT-NEXT:    br i1 [[TMP52]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
-; DEFAULT:       pred.store.if20:
+; DEFAULT:       pred.store.if13:
 ; DEFAULT-NEXT:    [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP53]], align 4
 ; DEFAULT-NEXT:    [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
@@ -1247,10 +1220,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP58]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE21]]
-; DEFAULT:       pred.store.continue21:
+; DEFAULT:       pred.store.continue14:
 ; DEFAULT-NEXT:    [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
 ; DEFAULT-NEXT:    br i1 [[TMP59]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
-; DEFAULT:       pred.store.if22:
+; DEFAULT:       pred.store.if15:
 ; DEFAULT-NEXT:    [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP60]], align 4
 ; DEFAULT-NEXT:    [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
@@ -1262,10 +1235,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP65]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE23]]
-; DEFAULT:       pred.store.continue23:
+; DEFAULT:       pred.store.continue16:
 ; DEFAULT-NEXT:    [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
 ; DEFAULT-NEXT:    br i1 [[TMP66]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
-; DEFAULT:       pred.store.if24:
+; DEFAULT:       pred.store.if17:
 ; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP67]], align 4
 ; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
@@ -1277,10 +1250,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP72]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE25]]
-; DEFAULT:       pred.store.continue25:
+; DEFAULT:       pred.store.continue18:
 ; DEFAULT-NEXT:    [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
 ; DEFAULT-NEXT:    br i1 [[TMP73]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]]
-; DEFAULT:       pred.store.if26:
+; DEFAULT:       pred.store.if19:
 ; DEFAULT-NEXT:    [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP74]], align 4
 ; DEFAULT-NEXT:    [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
@@ -1292,7 +1265,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
 ; DEFAULT-NEXT:    store float 0.000000e+00, ptr [[TMP79]], align 4
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE27]]
-; DEFAULT:       pred.store.continue27:
+; DEFAULT:       pred.store.continue20:
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; DEFAULT-NEXT:    [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1301,7 +1274,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; DEFAULT-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; DEFAULT:       scalar.ph:
-; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; DEFAULT-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; DEFAULT:       loop.header:
 ; DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
@@ -1336,33 +1309,6 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:  entry:
 ; PRED-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
 ; PRED-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; PRED:       vector.scevcheck:
-; PRED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
-; PRED-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; PRED-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; PRED-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
-; PRED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; PRED-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
-; PRED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; PRED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8
-; PRED-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; PRED-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; PRED-NEXT:    [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; PRED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; PRED-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
-; PRED-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
-; PRED-NEXT:    [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
-; PRED-NEXT:    [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0
-; PRED-NEXT:    [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1
-; PRED-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]]
-; PRED-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]]
-; PRED-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]]
-; PRED-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]]
-; PRED-NEXT:    [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; PRED-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]]
-; PRED-NEXT:    br i1 [[TMP14]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; PRED:       vector.ph:
 ; PRED-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7
 ; PRED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
@@ -1373,9 +1319,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 [[TMP0]])
 ; PRED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; PRED:       vector.body:
-; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ]
-; PRED-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE27]] ]
-; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE27]] ]
+; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE27:%.*]] ]
+; PRED-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_SCEVCHECK]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE27]] ]
+; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_SCEVCHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE27]] ]
 ; PRED-NEXT:    [[TMP18:%.*]] = load float, ptr [[SRC_1]], align 4
 ; PRED-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
 ; PRED-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -1411,7 +1357,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       pred.store.continue:
 ; PRED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP26]], i32 1
 ; PRED-NEXT:    br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
-; PRED:       pred.store.if14:
+; PRED:       pred.store.if7:
 ; PRED-NEXT:    [[TMP36:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP36]], align 4
 ; PRED-NEXT:    [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
@@ -1423,10 +1369,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP41:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 1
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP41]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE15]]
-; PRED:       pred.store.continue15:
+; PRED:       pred.store.continue8:
 ; PRED-NEXT:    [[TMP42:%.*]] = extractelement <8 x i1> [[TMP26]], i32 2
 ; PRED-NEXT:    br i1 [[TMP42]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
-; PRED:       pred.store.if16:
+; PRED:       pred.store.if9:
 ; PRED-NEXT:    [[TMP43:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP43]], align 4
 ; PRED-NEXT:    [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
@@ -1438,10 +1384,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP48:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 2
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP48]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE17]]
-; PRED:       pred.store.continue17:
+; PRED:       pred.store.continue10:
 ; PRED-NEXT:    [[TMP49:%.*]] = extractelement <8 x i1> [[TMP26]], i32 3
 ; PRED-NEXT:    br i1 [[TMP49]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
-; PRED:       pred.store.if18:
+; PRED:       pred.store.if11:
 ; PRED-NEXT:    [[TMP50:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP50]], align 4
 ; PRED-NEXT:    [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
@@ -1453,10 +1399,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP55:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 3
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP55]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE19]]
-; PRED:       pred.store.continue19:
+; PRED:       pred.store.continue12:
 ; PRED-NEXT:    [[TMP56:%.*]] = extractelement <8 x i1> [[TMP26]], i32 4
 ; PRED-NEXT:    br i1 [[TMP56]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
-; PRED:       pred.store.if20:
+; PRED:       pred.store.if13:
 ; PRED-NEXT:    [[TMP57:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP57]], align 4
 ; PRED-NEXT:    [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
@@ -1468,10 +1414,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP62:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 4
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP62]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE21]]
-; PRED:       pred.store.continue21:
+; PRED:       pred.store.continue14:
 ; PRED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i1> [[TMP26]], i32 5
 ; PRED-NEXT:    br i1 [[TMP63]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
-; PRED:       pred.store.if22:
+; PRED:       pred.store.if15:
 ; PRED-NEXT:    [[TMP64:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP64]], align 4
 ; PRED-NEXT:    [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
@@ -1483,10 +1429,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP69:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 5
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP69]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE23]]
-; PRED:       pred.store.continue23:
+; PRED:       pred.store.continue16:
 ; PRED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i1> [[TMP26]], i32 6
 ; PRED-NEXT:    br i1 [[TMP70]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
-; PRED:       pred.store.if24:
+; PRED:       pred.store.if17:
 ; PRED-NEXT:    [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP71]], align 4
 ; PRED-NEXT:    [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
@@ -1498,10 +1444,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP76:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 6
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP76]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE25]]
-; PRED:       pred.store.continue25:
+; PRED:       pred.store.continue18:
 ; PRED-NEXT:    [[TMP77:%.*]] = extractelement <8 x i1> [[TMP26]], i32 7
 ; PRED-NEXT:    br i1 [[TMP77]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27]]
-; PRED:       pred.store.if26:
+; PRED:       pred.store.if19:
 ; PRED-NEXT:    [[TMP78:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP78]], align 4
 ; PRED-NEXT:    [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
@@ -1513,7 +1459,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[TMP83:%.*]] = extractelement <8 x ptr> [[TMP27]], i32 7
 ; PRED-NEXT:    store float 0.000000e+00, ptr [[TMP83]], align 4
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE27]]
-; PRED:       pred.store.continue27:
+; PRED:       pred.store.continue20:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]])
 ; PRED-NEXT:    [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
@@ -1523,7 +1469,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED:       middle.block:
 ; PRED-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; PRED:       scalar.ph:
-; PRED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
+; PRED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; PRED-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; PRED:       loop.header:
 ; PRED-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
@@ -1770,7 +1716,7 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
 ; DEFAULT: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
 ; DEFAULT: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
 ; DEFAULT: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
-; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]}
+; DEFAULT: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
 ; DEFAULT: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
 ; DEFAULT: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
 ;.
@@ -1798,7 +1744,7 @@ attributes #2 = { vscale_range(2,2) "target-cpu"="neoverse-512tvb" }
 ; PRED: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
 ; PRED: [[LOOP22]] = distinct !{[[LOOP22]], [[META2]], [[META1]]}
 ; PRED: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]}
-; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]]}
+; PRED: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]}
 ; PRED: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]], [[META2]]}
 ; PRED: [[LOOP26]] = distinct !{[[LOOP26]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
index 5c9375eb1d17f4..907e95e1ef2932 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
@@ -8,78 +8,7 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali
 ; CHECK-LABEL: define void @test_free_instructions_feeding_geps_for_interleave_groups(
 ; CHECK-SAME: ptr noalias [[P_INVAR:%.*]], ptr noalias [[DST_1:%.*]], ptr noalias [[DST_2:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK:       [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST_1]], i64 8
-; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult ptr [[TMP1]], [[SCEVGEP]]
-; CHECK-NEXT:    [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST_1]], i64 12
-; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult ptr [[TMP5]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[DST_1]], i64 4
-; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 0, [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult ptr [[TMP9]], [[SCEVGEP5]]
-; CHECK-NEXT:    [[TMP11:%.*]] = or i1 [[TMP10]], [[MUL_OVERFLOW8]]
-; CHECK-NEXT:    [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 0, [[MUL_RESULT10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[DST_1]], i64 [[MUL_RESULT10]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult ptr [[TMP13]], [[DST_1]]
-; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW11]]
-; CHECK-NEXT:    [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[DST_2]], i64 8
-; CHECK-NEXT:    [[MUL13:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT14:%.*]] = extractvalue { i64, i1 } [[MUL13]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW15:%.*]] = extractvalue { i64, i1 } [[MUL13]], 1
-; CHECK-NEXT:    [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT14]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[SCEVGEP12]], i64 [[MUL_RESULT14]]
-; CHECK-NEXT:    [[TMP18:%.*]] = icmp ult ptr [[TMP17]], [[SCEVGEP12]]
-; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW15]]
-; CHECK-NEXT:    [[SCEVGEP16:%.*]] = getelementptr i8, ptr [[DST_2]], i64 12
-; CHECK-NEXT:    [[MUL17:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT18:%.*]] = extractvalue { i64, i1 } [[MUL17]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW19:%.*]] = extractvalue { i64, i1 } [[MUL17]], 1
-; CHECK-NEXT:    [[TMP20:%.*]] = sub i64 0, [[MUL_RESULT18]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[SCEVGEP16]], i64 [[MUL_RESULT18]]
-; CHECK-NEXT:    [[TMP22:%.*]] = icmp ult ptr [[TMP21]], [[SCEVGEP16]]
-; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP22]], [[MUL_OVERFLOW19]]
-; CHECK-NEXT:    [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[DST_2]], i64 4
-; CHECK-NEXT:    [[MUL21:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT22:%.*]] = extractvalue { i64, i1 } [[MUL21]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW23:%.*]] = extractvalue { i64, i1 } [[MUL21]], 1
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 0, [[MUL_RESULT22]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[SCEVGEP20]], i64 [[MUL_RESULT22]]
-; CHECK-NEXT:    [[TMP26:%.*]] = icmp ult ptr [[TMP25]], [[SCEVGEP20]]
-; CHECK-NEXT:    [[TMP27:%.*]] = or i1 [[TMP26]], [[MUL_OVERFLOW23]]
-; CHECK-NEXT:    [[MUL24:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 -1)
-; CHECK-NEXT:    [[MUL_RESULT25:%.*]] = extractvalue { i64, i1 } [[MUL24]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW26:%.*]] = extractvalue { i64, i1 } [[MUL24]], 1
-; CHECK-NEXT:    [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT25]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[DST_2]], i64 [[MUL_RESULT25]]
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp ult ptr [[TMP29]], [[DST_2]]
-; CHECK-NEXT:    [[TMP31:%.*]] = or i1 [[TMP30]], [[MUL_OVERFLOW26]]
-; CHECK-NEXT:    [[TMP32:%.*]] = or i1 [[TMP3]], [[TMP7]]
-; CHECK-NEXT:    [[TMP33:%.*]] = or i1 [[TMP32]], [[TMP11]]
-; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP15]]
-; CHECK-NEXT:    [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP19]]
-; CHECK-NEXT:    [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP23]]
-; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP27]]
-; CHECK-NEXT:    [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP31]]
-; CHECK-NEXT:    br i1 [[TMP38]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
@@ -111,7 +40,7 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -185,44 +114,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
 ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[ARG1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK:       [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8
-; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12
-; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
-; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4
-; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
-; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]]
-; CHECK-NEXT:    [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
-; CHECK-NEXT:    [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
-; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]]
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]]
-; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]]
-; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
 ; CHECK-NEXT:    [[TMP20:%.*]] = shl i64 [[ARG1]], 4
 ; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 16
@@ -274,7 +167,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -373,81 +266,8 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
-; CHECK:       [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[N]], 3
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]]
-; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28
-; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20
-; CHECK-NEXT:    [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult ptr [[TMP12]], [[SCEVGEP5]]
-; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW8]]
-; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; CHECK-NEXT:    [[MUL10:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT11:%.*]] = extractvalue { i64, i1 } [[MUL10]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW12:%.*]] = extractvalue { i64, i1 } [[MUL10]], 1
-; CHECK-NEXT:    [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]]
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]]
-; CHECK-NEXT:    [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT:    [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1
-; CHECK-NEXT:    [[TMP19:%.*]] = sub i64 0, [[MUL_RESULT15]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]]
-; CHECK-NEXT:    [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]]
-; CHECK-NEXT:    [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]]
-; CHECK-NEXT:    [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8
-; CHECK-NEXT:    [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1
-; CHECK-NEXT:    [[TMP23:%.*]] = sub i64 0, [[MUL_RESULT19]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]]
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]]
-; CHECK-NEXT:    [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]]
-; CHECK-NEXT:    [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT:    [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1
-; CHECK-NEXT:    [[TMP27:%.*]] = sub i64 0, [[MUL_RESULT23]]
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]]
-; CHECK-NEXT:    [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]]
-; CHECK-NEXT:    [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]]
-; CHECK-NEXT:    [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]])
-; CHECK-NEXT:    [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1
-; CHECK-NEXT:    [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]]
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]]
-; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]]
-; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]]
-; CHECK-NEXT:    [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]]
-; CHECK-NEXT:    [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]]
-; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]]
-; CHECK-NEXT:    [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]]
-; CHECK-NEXT:    [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]]
-; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]]
-; CHECK-NEXT:    [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]]
-; CHECK-NEXT:    br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
 ; CHECK-NEXT:    [[TMP42:%.*]] = lshr i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP43:%.*]] = shl i64 [[TMP42]], 5
@@ -475,7 +295,6 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 8, i64 16, i64 24>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP51:%.*]] = lshr exact i64 [[TMP50]], 1
@@ -484,8 +303,9 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-NEXT:    [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
 ; CHECK-NEXT:    [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]]
-; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP50]]
+; CHECK-NEXT:    [[WIDE_VEC7:%.*]] = load <32 x i32>, ptr [[TMP15]], align 4
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = shufflevector <32 x i32> [[WIDE_VEC7]], <32 x i32> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
 ; CHECK-NEXT:    [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -495,13 +315,12 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
 ; CHECK-NEXT:    store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ]
@@ -538,7 +357,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    store i32 0, ptr [[GEP_A_7]], align 4
 ; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -605,7 +424,7 @@ define void @interleave_store_double_i64(ptr %dst) {
 ; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -619,7 +438,7 @@ define void @interleave_store_double_i64(ptr %dst) {
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_0]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -727,7 +546,7 @@ define void @interleave_store_i64_double_2(ptr %dst) {
 ; CHECK-NEXT:    store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -741,7 +560,7 @@ define void @interleave_store_i64_double_2(ptr %dst) {
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[GEP_1]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 1
-; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -771,16 +590,13 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" }
 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
-; CHECK: [[META6]] = !{[[META7:![0-9]+]]}
-; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
-; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
-; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
-; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
-; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll
index e5ef8df9fc9cd9..eacd917a00a284 100644
--- a/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll
+++ b/llvm/test/Transforms/LoopVersioning/incorrect-phi.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes=loop-versioning -S < %s | FileCheck %s
 
 ; Make sure all PHIs are properly updated in the exit block.  Based on
@@ -8,8 +9,26 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @x = external global [2 x [3 x [5 x i16]]]
 
-; CHECK-LABEL: @phi_with_undef
 define void @phi_with_undef() {
+; CHECK-LABEL: define void @phi_with_undef() {
+; CHECK-NEXT:  [[BB6_LR_PH:.*]]:
+; CHECK-NEXT:    br label %[[BB6:.*]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[_TMP1423:%.*]] = phi i64 [ undef, %[[BB6_LR_PH]] ], [ [[_TMP142:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    [[_TMP123:%.*]] = getelementptr [2 x [3 x [5 x i16]]], ptr @x, i16 0, i64 undef
+; CHECK-NEXT:    [[_TMP126:%.*]] = getelementptr [3 x [5 x i16]], ptr [[_TMP123]], i16 0, i64 [[_TMP1423]]
+; CHECK-NEXT:    [[_TMP129:%.*]] = getelementptr [5 x i16], ptr [[_TMP126]], i16 0, i64 undef
+; CHECK-NEXT:    [[_TMP130:%.*]] = load i16, ptr [[_TMP129]], align 2
+; CHECK-NEXT:    store i16 undef, ptr @x, align 2
+; CHECK-NEXT:    [[_TMP142]] = add i64 [[_TMP1423]], 1
+; CHECK-NEXT:    br i1 false, label %[[BB6]], label %[[LOOP_EXIT:.*]]
+; CHECK:       [[LOOP_EXIT]]:
+; CHECK-NEXT:    [[_TMP142_LCSSA:%.*]] = phi i64 [ [[_TMP142]], %[[BB6]] ]
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi i16 [ undef, %[[BB6]] ]
+; CHECK-NEXT:    br label %[[BB9:.*]]
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    ret void
+;
 bb6.lr.ph:                                        ; preds = %bb5.preheader
   br label %bb6
 
@@ -26,17 +45,33 @@ bb6:                                              ; preds = %bb6.lr.ph, %bb6
 loop.exit:                                ; preds = %bb6
   %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
   %split = phi i16 [ undef, %bb6 ]
-; CHECK: %split.ph = phi i16 [ undef, %bb6.lver.orig ]
-; CHECK: %split.ph3 = phi i16 [ undef, %bb6 ]
-; CHECK: %split = phi i16 [ %split.ph, %loop.exit.loopexit ], [ %split.ph3, %loop.exit.loopexit1 ]
   br label %bb9
 
 bb9:                                              ; preds = %bb9.loopexit, %bb1
   ret void
 }
 
-; CHECK-LABEL: @phi_with_non_loop_defined_value
 define void @phi_with_non_loop_defined_value() {
+; CHECK-LABEL: define void @phi_with_non_loop_defined_value() {
+; CHECK-NEXT:  [[BB6_LR_PH:.*]]:
+; CHECK-NEXT:    [[T:%.*]] = add i16 1, 1
+; CHECK-NEXT:    br label %[[BB6:.*]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[_TMP1423:%.*]] = phi i64 [ undef, %[[BB6_LR_PH]] ], [ [[_TMP142:%.*]], %[[BB6]] ]
+; CHECK-NEXT:    [[_TMP123:%.*]] = getelementptr [2 x [3 x [5 x i16]]], ptr @x, i16 0, i64 undef
+; CHECK-NEXT:    [[_TMP126:%.*]] = getelementptr [3 x [5 x i16]], ptr [[_TMP123]], i16 0, i64 [[_TMP1423]]
+; CHECK-NEXT:    [[_TMP129:%.*]] = getelementptr [5 x i16], ptr [[_TMP126]], i16 0, i64 undef
+; CHECK-NEXT:    [[_TMP130:%.*]] = load i16, ptr [[_TMP129]], align 2
+; CHECK-NEXT:    store i16 undef, ptr @x, align 2
+; CHECK-NEXT:    [[_TMP142]] = add i64 [[_TMP1423]], 1
+; CHECK-NEXT:    br i1 false, label %[[BB6]], label %[[LOOP_EXIT:.*]]
+; CHECK:       [[LOOP_EXIT]]:
+; CHECK-NEXT:    [[_TMP142_LCSSA:%.*]] = phi i64 [ [[_TMP142]], %[[BB6]] ]
+; CHECK-NEXT:    [[SPLIT:%.*]] = phi i16 [ [[T]], %[[BB6]] ]
+; CHECK-NEXT:    br label %[[BB9:.*]]
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    ret void
+;
 bb6.lr.ph:                                        ; preds = %bb5.preheader
   %t = add i16 1, 1
   br label %bb6
@@ -54,9 +89,6 @@ bb6:                                              ; preds = %bb6.lr.ph, %bb6
 loop.exit:                                ; preds = %bb6
   %_tmp142.lcssa = phi i64 [ %_tmp142, %bb6 ]
   %split = phi i16 [ %t, %bb6 ]
-; CHECK: %split.ph = phi i16 [ %t, %bb6.lver.orig ]
-; CHECK: %split.ph3 = phi i16 [ %t, %bb6 ]
-; CHECK: %split = phi i16 [ %split.ph, %loop.exit.loopexit ], [ %split.ph3, %loop.exit.loopexit1 ]
   br label %bb9
 
 bb9:                                              ; preds = %bb9.loopexit, %bb1
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
index 6e5eeaf9f12188..1d416c46da9c60 100644
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -30,15 +30,7 @@ define void @f1(ptr noalias %a,
 ; LV-NEXT:  for.body.lver.check:
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
-; LV-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
-; LV-NEXT:    [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
-; LV-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
-; LV-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
-; LV-NEXT:    [[TMP6:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
-; LV-NEXT:    [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP6]]
-; LV-NEXT:    br i1 [[TMP7]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -46,7 +38,7 @@ define void @f1(ptr noalias %a,
 ; LV-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
 ; LV-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
 ; LV-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
-; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
+; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
 ; LV-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
 ; LV-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
 ; LV-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
@@ -75,7 +67,7 @@ define void @f1(ptr noalias %a,
 ; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit2:
+; LV:       for.end.loopexit1:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -149,19 +141,7 @@ define void @f2(ptr noalias %a,
 ; LV-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], [[TMP1]]
 ; LV-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; LV-NEXT:    [[TMP8:%.*]] = trunc i64 [[N]] to i31
-; LV-NEXT:    [[TMP9:%.*]] = zext i31 [[TMP8]] to i64
-; LV-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
-; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP10]]
-; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; LV-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP11]]
-; LV-NEXT:    [[TMP13:%.*]] = icmp ugt ptr [[TMP12]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP14:%.*]] = or i1 [[TMP13]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP15:%.*]] = or i1 [[TMP7]], [[TMP14]]
+; LV-NEXT:    [[TMP15:%.*]] = or i1 [[TMP5]], [[TMP6]]
 ; LV-NEXT:    br i1 [[TMP15]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -170,7 +150,7 @@ define void @f2(ptr noalias %a,
 ; LV-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
 ; LV-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
 ; LV-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
-; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
+; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
 ; LV-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
 ; LV-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
 ; LV-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
@@ -199,7 +179,7 @@ define void @f2(ptr noalias %a,
 ; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit5:
+; LV:       for.end.loopexit2:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -255,15 +235,7 @@ define void @f3(ptr noalias %a,
 ; LV-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[MUL_RESULT]], 0
 ; LV-NEXT:    [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
-; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; LV-NEXT:    [[TMP6:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP8:%.*]] = icmp ult ptr [[TMP7]], [[A]]
-; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP5]], [[TMP9]]
+; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP3]], [[TMP4]]
 ; LV-NEXT:    br i1 [[TMP10]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -272,7 +244,7 @@ define void @f3(ptr noalias %a,
 ; LV-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
 ; LV-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
 ; LV-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64
-; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
+; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
 ; LV-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
 ; LV-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
 ; LV-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
@@ -301,7 +273,7 @@ define void @f3(ptr noalias %a,
 ; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit5:
+; LV:       for.end.loopexit2:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -351,18 +323,7 @@ define void @f4(ptr noalias %a,
 ; LV-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]]
 ; LV-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
-; LV-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; LV-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP1]] to i64
-; LV-NEXT:    [[TMP9:%.*]] = shl nsw i64 [[TMP8]], 1
-; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP9]]
-; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; LV-NEXT:    [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP10]]
-; LV-NEXT:    [[TMP12:%.*]] = icmp ugt ptr [[TMP11]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]]
+; LV-NEXT:    [[TMP14:%.*]] = or i1 [[TMP5]], [[TMP6]]
 ; LV-NEXT:    br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
@@ -371,7 +332,7 @@ define void @f4(ptr noalias %a,
 ; LV-NEXT:    [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
 ; LV-NEXT:    [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
 ; LV-NEXT:    [[MUL_EXT_LVER_ORIG:%.*]] = sext i32 [[MUL_LVER_ORIG]] to i64
-; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
+; LV-NEXT:    [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr i16, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
 ; LV-NEXT:    [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2
 ; LV-NEXT:    [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]]
 ; LV-NEXT:    [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2
@@ -400,7 +361,7 @@ define void @f4(ptr noalias %a,
 ; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit5:
+; LV:       for.end.loopexit2:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void