[llvm] 4eb9838 - [VPlan] Generalize VPValue::isDefinedOutsideLoopRegions.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 07:34:17 PDT 2024
Author: Florian Hahn
Date: 2024-09-20T15:34:00+01:00
New Revision: 4eb98384099cbd1d901d8ef6a210af95d9c36adf
URL: https://github.com/llvm/llvm-project/commit/4eb98384099cbd1d901d8ef6a210af95d9c36adf
DIFF: https://github.com/llvm/llvm-project/commit/4eb98384099cbd1d901d8ef6a210af95d9c36adf.diff
LOG: [VPlan] Generalize VPValue::isDefinedOutsideLoopRegions.
Update isDefinedOutsideLoopRegions to check if a recipe is defined
outside any region. Split off already approved
https://github.com/llvm/llvm-project/pull/95842 now that this can be
tested separately after landing VPlan-based LICM
https://github.com/llvm/llvm-project/issues/107501
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanValue.h
llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 6a6ec363592c32..eb6f7e95fc223c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -576,8 +576,9 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
return SplitBlock;
}
-VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
- VPRegionBlock *P = getParent();
+/// Return the enclosing loop region for region \p P. The templated version is
+/// used to support both const and non-const block arguments.
+template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
if (P && P->isReplicator()) {
P = P->getParent();
assert(!cast<VPRegionBlock>(P)->isReplicator() &&
@@ -586,6 +587,14 @@ VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
return P;
}
+VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
+ return getEnclosingLoopRegionForRegion(getParent());
+}
+
+const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
+ return getEnclosingLoopRegionForRegion(getParent());
+}
+
static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
if (VPBB->empty()) {
assert(
@@ -1458,6 +1467,11 @@ void VPlanIngredient::print(raw_ostream &O) const {
#endif
+bool VPValue::isDefinedOutsideLoopRegions() const {
+ return !hasDefiningRecipe() ||
+ !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
+}
+
void VPValue::replaceAllUsesWith(VPValue *New) {
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 73d218cdc7ac27..116d731d1f02a6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3199,6 +3199,7 @@ class VPBasicBlock : public VPBlockBase {
VPBasicBlock *splitAt(iterator SplitAt);
VPRegionBlock *getEnclosingLoopRegion();
+ const VPRegionBlock *getEnclosingLoopRegion() const;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a47ce61e28c50b..a23a59aa2f11c2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -180,10 +180,8 @@ class VPValue {
return getUnderlyingValue();
}
- /// Returns true if the VPValue is defined outside any loop region, i.e. it
- /// is a live-in value.
- /// TODO: Also handle recipes defined in pre-header blocks.
- bool isDefinedOutsideLoopRegions() const { return !hasDefiningRecipe(); }
+ /// Returns true if the VPValue is defined outside any loop region.
+ bool isDefinedOutsideLoopRegions() const;
// Set \p Val as the underlying Value of this VPValue.
void setUnderlyingValue(Value *Val) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index 6953d6c48694c2..dc5cb246039e1d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -58,30 +58,30 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967288
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX7]]
-; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX7]]
-; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16>
-; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16>
+; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX6]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6]]
+; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16>
+; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7]] to <8 x i16>
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8>
; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX7]]
-; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
-; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD10]] to <8 x i16>
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6]]
+; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16>
; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]]
; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8>
; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1
-; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 8
-; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC5]]
+; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX6]], 8
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
-; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -165,15 +165,15 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP6]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP9:%.*]] = trunc nuw <16 x i16> [[TMP7]] to <16 x i8>
; CHECK-NEXT: [[TMP10:%.*]] = trunc nuw <16 x i16> [[TMP8]] to <16 x i8>
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
@@ -190,13 +190,13 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0
-; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
-; CHECK: vec.epilog.vector.body:
-; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8>
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
@@ -244,41 +244,47 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
+; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[C]], <16 x i16> [[TMP6]], <16 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP8]] to <16 x i8>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]]
-; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
-; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
+; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1
+; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> poison, i16 [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP12]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
-; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX3]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
+; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8
-; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
-; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
+; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 5d163a97e3be3d..631d04dbcb088b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -28,6 +28,12 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: [[TMP19:%.*]] = sdiv i64 [[M]], [[CONV6]]
+; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32
+; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP19]] to i32
+; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP18]], [[CONV61]]
+; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP19]], [[CONV61]]
+; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[X]], [[TMP20]]
+; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[X]], [[TMP21]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -37,16 +43,10 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], 0
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 1
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]]
-; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32
-; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP19]] to i32
-; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP18]], [[CONV61]]
-; CHECK-NEXT: [[TMP23:%.*]] = mul i64 [[TMP19]], [[CONV61]]
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP12]], [[TMP22]]
; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP17]], [[TMP23]]
; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP24]] to i32
; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP25]] to i32
-; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[X]], [[TMP20]]
-; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[X]], [[TMP21]]
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP29]], [[TMP27]]
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index 4fd9604dd6d257..28830f9bcd11e1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -18,17 +18,17 @@
; Check that the extractvalue operands are actually free in vector code.
; FORCED: [[E1:%.+]] = extractvalue { i64, i64 } %sv, 0
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i64> poison, i64 [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { i64, i64 } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x i64> poison, i64 [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x i64> %broadcast.splatinsert1, <2 x i64> poison, <2 x i32> zeroinitializer
+; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; FORCED-NEXT: [[IV_0:%.]] = add i32 %index, 0
; FORCED-NEXT: [[GEP:%.+]] = getelementptr i64, ptr %dst, i32 [[IV_0]]
-; FORCED-NEXT: [[ADD:%.+]] = add <2 x i64> %broadcast.splat, %broadcast.splat2
; FORCED-NEXT: [[GEP2:%.+]] = getelementptr i64, ptr [[GEP]], i32 0
; FORCED-NEXT: store <2 x i64> [[ADD]], ptr [[GEP2]], align 4
; FORCED-NEXT: %index.next = add nuw i32 %index, 2
@@ -68,9 +68,9 @@ declare float @powf(float, float) readnone nounwind
; FORCED-LABEL: define void @test_getVectorCallCost
; FORCED: [[E1:%.+]] = extractvalue { float, float } %sv, 0
+; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x float> poison, float [[E1]], i64 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x float> %broadcast.splatinsert, <2 x float> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[E2:%.+]] = extractvalue { float, float } %sv, 1
; FORCED-NEXT: %broadcast.splatinsert1 = insertelement <2 x float> poison, float [[E2]], i64 0
; FORCED-NEXT: %broadcast.splat2 = shufflevector <2 x float> %broadcast.splatinsert1, <2 x float> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 47c0949cee59eb..e6c620018b7db4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -23,16 +23,32 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; DEFAULT-NEXT: [[TMP13:%.*]] = add i64 [[Y]], 1
; DEFAULT-NEXT: [[TMP14:%.*]] = add i64 [[Y]], 1
+; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP13]]
+; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP14]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT6]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; DEFAULT-NEXT: [[TMP26:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; DEFAULT-NEXT: [[TMP31:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; DEFAULT-NEXT: [[TMP32:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP31]]
+; DEFAULT-NEXT: [[TMP34:%.*]] = or <vscale x 4 x i32> [[TMP26]], [[TMP32]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT8]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP39:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]]
; DEFAULT-NEXT: [[TMP40:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT9]], [[BROADCAST_SPLAT7]]
+; DEFAULT-NEXT: [[TMP41:%.*]] = and <vscale x 4 x i32> [[TMP39]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; DEFAULT-NEXT: [[TMP42:%.*]] = and <vscale x 4 x i32> [[TMP40]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; DEFAULT-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i32> [[TMP41]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; DEFAULT-NEXT: [[TMP44:%.*]] = xor <vscale x 4 x i32> [[TMP42]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; DEFAULT-NEXT: [[TMP45:%.*]] = zext <vscale x 4 x i32> [[TMP43]] to <vscale x 4 x i64>
+; DEFAULT-NEXT: [[TMP46:%.*]] = zext <vscale x 4 x i32> [[TMP44]] to <vscale x 4 x i64>
+; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <vscale x 4 x i64> [[TMP45]], i32 0
+; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP61]]
+; DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP62]], i64 0
+; DEFAULT-NEXT: [[TMP47:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
+; DEFAULT-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP62]], i64 0
+; DEFAULT-NEXT: [[TMP48:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT3]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 4
; DEFAULT-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], 1
@@ -48,16 +64,11 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP13]]
-; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP14]]
-; DEFAULT-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 4
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
-; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+; DEFAULT-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP15]], align 4
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP18]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT5]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR]], <vscale x 4 x i32> [[BROADCAST_SPLAT]], i32 -1)
-; DEFAULT-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x i32> [[BROADCAST_SPLAT5]], i32 -1)
+; DEFAULT-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR]], <vscale x 4 x i32> [[BROADCAST_SPLAT5]], i32 -1)
+; DEFAULT-NEXT: [[TMP20]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], <vscale x 4 x i32> [[BROADCAST_SPLAT5]], i32 -1)
; DEFAULT-NEXT: [[TMP21:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR2]], <vscale x 4 x i32> [[TMP19]], i32 -1)
; DEFAULT-NEXT: [[TMP22:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x i32> [[TMP20]], i32 -1)
; DEFAULT-NEXT: [[TMP23:%.*]] = or <vscale x 4 x i32> [[TMP21]], [[BROADCAST_SPLAT7]]
@@ -66,20 +77,10 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT-NEXT: [[TMP28:%.*]] = shl <vscale x 4 x i32> [[TMP24]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; DEFAULT-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP27]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; DEFAULT-NEXT: [[TMP30:%.*]] = or <vscale x 4 x i32> [[TMP28]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP31]]
-; DEFAULT-NEXT: [[TMP34:%.*]] = or <vscale x 4 x i32> [[TMP26]], [[TMP32]]
; DEFAULT-NEXT: [[TMP35:%.*]] = or <vscale x 4 x i32> [[TMP33]], [[TMP29]]
; DEFAULT-NEXT: [[TMP36:%.*]] = or <vscale x 4 x i32> [[TMP34]], [[TMP30]]
; DEFAULT-NEXT: [[TMP37:%.*]] = or <vscale x 4 x i32> [[TMP35]], [[BROADCAST_SPLAT7]]
; DEFAULT-NEXT: [[TMP38:%.*]] = or <vscale x 4 x i32> [[TMP36]], [[BROADCAST_SPLAT7]]
-; DEFAULT-NEXT: [[TMP41:%.*]] = and <vscale x 4 x i32> [[TMP39]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; DEFAULT-NEXT: [[TMP42:%.*]] = and <vscale x 4 x i32> [[TMP40]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; DEFAULT-NEXT: [[TMP43:%.*]] = xor <vscale x 4 x i32> [[TMP41]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; DEFAULT-NEXT: [[TMP44:%.*]] = xor <vscale x 4 x i32> [[TMP42]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; DEFAULT-NEXT: [[TMP45:%.*]] = zext <vscale x 4 x i32> [[TMP43]] to <vscale x 4 x i64>
-; DEFAULT-NEXT: [[TMP46:%.*]] = zext <vscale x 4 x i32> [[TMP44]] to <vscale x 4 x i64>
-; DEFAULT-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP45]]
-; DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP46]]
; DEFAULT-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP47]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
; DEFAULT-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP48]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
; DEFAULT-NEXT: [[TMP49:%.*]] = lshr <vscale x 4 x i32> [[TMP37]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
@@ -168,13 +169,22 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]])
; PRED-NEXT: [[TMP19:%.*]] = add i64 [[Y]], 1
+; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP19]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP25:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; PRED-NEXT: [[TMP28:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT4]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; PRED-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP28]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP32:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]]
+; PRED-NEXT: [[TMP33:%.*]] = and <vscale x 4 x i32> [[TMP32]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; PRED-NEXT: [[TMP34:%.*]] = xor <vscale x 4 x i32> [[TMP33]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; PRED-NEXT: [[TMP35:%.*]] = zext <vscale x 4 x i32> [[TMP34]] to <vscale x 4 x i64>
+; PRED-NEXT: [[TMP46:%.*]] = extractelement <vscale x 4 x i64> [[TMP35]], i32 0
+; PRED-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[TMP46]]
+; PRED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP47]], i64 0
+; PRED-NEXT: [[TMP36:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
; PRED-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 4
; PRED-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1
@@ -190,7 +200,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[VECTOR_RECUR2:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT1]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[VECTOR_BODY]] ]
-; PRED-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP19]]
; PRED-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP21]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
@@ -199,13 +208,8 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP24:%.*]] = or <vscale x 4 x i32> [[TMP23]], [[BROADCAST_SPLAT4]]
; PRED-NEXT: [[TMP26:%.*]] = shl <vscale x 4 x i32> [[TMP24]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; PRED-NEXT: [[TMP27:%.*]] = or <vscale x 4 x i32> [[TMP26]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; PRED-NEXT: [[TMP29:%.*]] = or <vscale x 4 x i32> [[TMP25]], [[TMP28]]
; PRED-NEXT: [[TMP30:%.*]] = or <vscale x 4 x i32> [[TMP29]], [[TMP27]]
; PRED-NEXT: [[TMP31:%.*]] = or <vscale x 4 x i32> [[TMP30]], [[BROADCAST_SPLAT4]]
-; PRED-NEXT: [[TMP33:%.*]] = and <vscale x 4 x i32> [[TMP32]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; PRED-NEXT: [[TMP34:%.*]] = xor <vscale x 4 x i32> [[TMP33]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; PRED-NEXT: [[TMP35:%.*]] = zext <vscale x 4 x i32> [[TMP34]] to <vscale x 4 x i64>
-; PRED-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP35]]
; PRED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP36]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
; PRED-NEXT: [[TMP37:%.*]] = lshr <vscale x 4 x i32> [[TMP31]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
; PRED-NEXT: [[TMP38:%.*]] = zext <vscale x 4 x i32> [[TMP37]] to <vscale x 4 x i64>
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index c8114e3d28f4e2..7778f01c58dc34 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -5,13 +5,13 @@ target triple = "aarch64-unknown-linux-gnu"
define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
; CHECK-LABEL: @widen_extractvalue(
; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
+; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { i64, i64 } [[SV]], 1
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT1]], i64 0
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK: vector.body:
; CHECK: [[ADD:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT2]]
+; CHECK: vector.body:
entry:
br label %loop.body
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 1990590252ea6c..f63b1b73dc522b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -10,6 +10,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 48, i64 48, i64 48>
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], <i64 52, i64 52, i64 52, i64 52>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
@@ -20,8 +22,6 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], <i64 52, i64 52, i64 52, i64 52>
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], <i32 8, i32 8, i32 8, i32 8>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index 315064a474a2f6..fa13cd8f19ae56 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -162,6 +162,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> <i1 true, i1 true>, [[TMP0]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -172,7 +173,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[VEC_IV]], i32 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 [[TMP1]], i32 2)
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> <i1 true, i1 true>, [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
@@ -226,30 +226,18 @@ define i8 @icmp_ops_narrowed_to_i1() #1 {
; CHECK-LABEL: define i8 @icmp_ops_narrowed_to_i1(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
-; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 0, 0
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[C]] to i64
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[EXT]], 1
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i8
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 100
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i8 [ [[TRUNC]], %[[LOOP]] ], [ 0, %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i8 [ [[TRUNC]], %[[LOOP]] ]
; CHECK-NEXT: ret i8 [[TRUNC_LCSSA]]
;
entry:
@@ -289,23 +277,23 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[N]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[T]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT4]], i32 8, <vscale x 2 x i1> [[TMP8]], <vscale x 2 x double> poison)
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[WIDE_MASKED_GATHER]], <vscale x 2 x ptr> [[BROADCAST_SPLAT6]], i32 8, <vscale x 2 x i1> [[TMP8]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -326,7 +314,7 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[V]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -370,6 +358,4 @@ attributes #1 = { "target-features"="+64bit,+v" }
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index b9a6235db6fa60..5e8f287217478b 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -10,13 +10,6 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], <i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24, i40 24>
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], <i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28, i40 28>
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
@@ -24,6 +17,13 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
@@ -34,105 +34,105 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP0]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP0]], i32 4
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP0]], i32 5
; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[TMP0]], i32 6
; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP0]], i32 7
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
; CHECK: pred.store.if15:
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[TMP0]], i32 8
; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP0]], i32 9
; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[TMP0]], i32 10
; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
; CHECK: pred.store.if21:
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP0]], i32 11
; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
; CHECK: pred.store.if23:
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue24:
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[TMP0]], i32 12
; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
; CHECK: pred.store.if25:
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.continue26:
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP0]], i32 13
; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
; CHECK: pred.store.if27:
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
; CHECK: pred.store.continue28:
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[TMP0]], i32 14
; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
; CHECK: pred.store.if29:
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
; CHECK: pred.store.continue30:
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP0]], i32 15
; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
; CHECK: pred.store.if31:
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
; CHECK: pred.store.continue32:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index f85f5bd0e27afc..f1f285eda38a6b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -799,38 +799,34 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI4]], <i64 1, i64 1>
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]])
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]])
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP18]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP9]], [[BIN_RDX]]
; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <2 x i64> [[TMP10]], [[BIN_RDX5]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX6]])
+; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX6]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP22]] = add i64 [[TMP21]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP18]] = add i64 [[TMP17]], 1
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[N]], 0
; CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
@@ -838,7 +834,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[GEP]], [[END]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP22]], [[LOOP]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP18]], [[LOOP]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[DOTLCSSA]]
;
entry:
@@ -869,23 +865,23 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i64> [[TMP0]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP2]] to <4 x i32>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP0]], <i64 12, i64 12, i64 12, i64 12>
-; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i64> [[TMP0]], <i64 12, i64 12, i64 12, i64 12>
-; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32>
-; CHECK-NEXT: [[TMP11]] = and <4 x i32> [[VEC_PHI]], [[TMP9]]
-; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP10]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 4
+; CHECK-NEXT: [[TMP11]] = and <4 x i32> [[VEC_PHI]], [[TMP3]]
+; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -1013,14 +1009,14 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[Y:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT2]], [[TMP1]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64>
-; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT2]], [[TMP1]]
; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index 7566844ff39df8..229bdc341a2c34 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -20,6 +20,9 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
; CHECK-NEXT: [[TMP17:%.*]] = sdiv i64 [[M]], [[CONV6]]
+; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP17]] to i32
+; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP17]], [[CONV61]]
+; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[X]], [[TMP19]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -30,11 +33,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP17]] to i32
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP17]], [[CONV61]]
; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP5]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
-; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[X]], [[TMP19]]
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP22]]
; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[TMP24]] to i64
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP25]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index 7d278988c53f85..fda7655cd9cf86 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -118,17 +118,14 @@ define i32 @uniform_address(ptr align(4) %addr, i32 %byte_offset) {
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[BYTE_OFFSET]], 4
; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[BYTE_OFFSET]], 4
; CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[BYTE_OFFSET]], 4
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ADDR]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[ADDR]], i32 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[ADDR]], i32 [[TMP3]]
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -146,7 +143,7 @@ define i32 @uniform_address(ptr align(4) %addr, i32 %byte_offset) {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 4096
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: loopexit:
-; CHECK-NEXT: [[LOAD_LCSSA:%.*]] = phi i32 [ [[LOAD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[LOAD_LCSSA:%.*]] = phi i32 [ [[LOAD]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[LOAD_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
index 1694de018c53be..048d18ea8422d0 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
@@ -70,15 +70,15 @@ define i32 @test_icmp_and_op_zext(ptr %dst, i64 %a) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[AND]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i8>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 1, [[DOTCAST]]
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i64>
-; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[BROADCAST_SPLAT]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i8>
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i16 [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
; CHECK-NEXT: store <4 x i8> [[TMP4]], ptr [[TMP6]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
index 8b24f9993de705..cae6ece9f9d8c9 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
@@ -12,12 +12,12 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
@@ -72,12 +72,12 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
@@ -132,12 +132,12 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index c6c834b5382f75..b3ec3e8f0f3c63 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -179,9 +179,9 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP11]], align 8
-; CHECK-NEXT: store ptr [[TMP81]], ptr [[TMP13]], align 8
-; CHECK-NEXT: store ptr [[TMP82]], ptr [[TMP15]], align 8
-; CHECK-NEXT: store ptr [[TMP83]], ptr [[TMP17]], align 8
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
More information about the llvm-commits
mailing list