[llvm] 95e5d40 - [SLP]Improve splats vectorization.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 30 10:19:02 PDT 2021
Author: Alexey Bataev
Date: 2021-07-30T10:17:45-07:00
New Revision: 95e5d401ae6c773e9f531f9c6ce62faf03dcd394
URL: https://github.com/llvm/llvm-project/commit/95e5d401ae6c773e9f531f9c6ce62faf03dcd394
DIFF: https://github.com/llvm/llvm-project/commit/95e5d401ae6c773e9f531f9c6ce62faf03dcd394.diff
LOG: [SLP]Improve splats vectorization.
Replace insertelement instructions for splats with just single
insertelement + broadcast shuffle. Also, try to merge these instructions
if they come from the same/shuffled gather node.
Differential Revision: https://reviews.llvm.org/D107104
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
llvm/test/Transforms/SLPVectorizer/X86/cse.ll
llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
llvm/test/Transforms/SLPVectorizer/X86/resched.ll
llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 839162b99ace3..10dcf6e1caca1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5521,11 +5521,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}).base());
VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
int UniqueVals = 0;
- bool HasUndefs = false;
for (Value *V : VL.drop_back(VL.size() - VF)) {
if (isa<UndefValue>(V)) {
ReuseShuffleIndicies.emplace_back(UndefMaskElem);
- HasUndefs = true;
continue;
}
if (isConstant(V)) {
@@ -5540,15 +5538,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
++UniqueVals;
}
}
- if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
+ if (UniqueVals == 1 && UniqueValues.size() == 1) {
// Emit pure splat vector.
- // FIXME: why it is not identified as an identity.
- unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
- if (NumUndefs == ReuseShuffleIndicies.size() - 1)
- ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
- UndefMaskElem);
- else
- ReuseShuffleIndicies.assign(VF, 0);
+ ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
+ UndefMaskElem);
} else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
ReuseShuffleIndicies.clear();
UniqueValues.clear();
@@ -6398,7 +6391,8 @@ void BoUpSLP::optimizeGatherSequence() {
Instruction *In = &*it++;
if (isDeleted(In))
continue;
- if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
+ if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
+ !isa<ShuffleVectorInst>(In))
continue;
// Check if we can replace this instruction with any of the
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
index 61c87a7f861d5..f4b027086265f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
@@ -22,23 +22,19 @@ define void @f_noalias(i8* noalias nocapture %dst, i8* noalias nocapture readonl
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP1]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <4 x i32> [[TMP14]], <i32 256, i32 256, i32 256, i32 256>
-; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = sext <4 x i1> [[TMP16]] to <4 x i32>
-; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP14]], <4 x i32> [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i8>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP6]], [[SHUFFLE1]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], <i32 256, i32 256, i32 256, i32 256>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP12]] to <4 x i8>
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[DST]] to <4 x i8>*
-; CHECK-NEXT: store <4 x i8> [[TMP19]], <4 x i8>* [[TMP20]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[DST]] to <4 x i8>*
+; CHECK-NEXT: store <4 x i8> [[TMP13]], <4 x i8>* [[TMP14]], align 1
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
index 374f240a1ac2d..de9417759dd62 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
@@ -204,16 +204,10 @@ define void @select_uniform_ugt_8xi8(i8* %ptr, i8 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
-; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
+; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -280,23 +274,17 @@ define void @select_uniform_ugt_16xi8(i8* %ptr, i8 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i8> [[TMP3]], i8 [[X]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i8> [[TMP4]], i8 [[X]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i8> [[TMP5]], i8 [[X]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[TMP6]], i8 [[X]], i32 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[X]], i32 5
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i8> [[TMP8]], i8 [[X]], i32 6
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[X]], i32 7
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
-; CHECK-NEXT: store <8 x i8> [[TMP11]], <8 x i8>* [[TMP12]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i8> [[TMP1]], <8 x i8> [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[PTR]] to <8 x i8>*
+; CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[TMP5]], align 2
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 8
; CHECK-NEXT: [[L_8:%.*]] = load i8, i8* [[GEP_8]], align 1
; CHECK-NEXT: [[CMP_8:%.*]] = icmp ugt i8 [[L_8]], -1
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
-; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP13]], i8 [[X]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i8> [[TMP11]], i32 0
-; CHECK-NEXT: store i8 [[TMP14]], i8* [[GEP_8]], align 2
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i8> [[TMP1]], i32 0
+; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP6]], i8 [[X]]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0
+; CHECK-NEXT: store i8 [[TMP7]], i8* [[GEP_8]], align 2
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i8 9
; CHECK-NEXT: [[L_9:%.*]] = load i8, i8* [[GEP_9]], align 1
; CHECK-NEXT: [[CMP_9:%.*]] = icmp ugt i8 [[L_9]], -1
@@ -444,12 +432,10 @@ define void @select_uniform_ugt_4xi16(i16* %ptr, i16 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[X]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[X]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> [[TMP5]], i16 [[X]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
-; CHECK-NEXT: store <4 x i16> [[TMP7]], <4 x i16>* [[TMP8]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[TMP4]], <4 x i16>* [[TMP5]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -493,16 +479,10 @@ define void @select_uniform_ult_8xi16(i16* %ptr, i16 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[X]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[X]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[X]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[X]], i32 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i16> [[TMP7]], i16 [[X]], i32 5
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i16> [[TMP8]], i16 [[X]], i32 6
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i16> [[TMP9]], i16 [[X]], i32 7
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP11]], <8 x i16>* [[TMP12]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* [[TMP5]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -594,12 +574,10 @@ define void @select_uniform_eq_4xi32(i32* %ptr, i32 %x) {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[X]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[X]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> [[SHUFFLE]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
index 36f5453e08afc..067049c262932 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -491,8 +491,7 @@ define void @first_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2)
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 1, i32 2>
-; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE2]]
; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]]
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
; CHECK-NEXT: [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP22]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
index 7c85088c2e6bb..7f51dcae484ca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -8,21 +8,19 @@ define void @test() #0 {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_EXTRA1:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP6:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], <i64 3, i64 2, i64 1, i64 0>
-; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
-; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP5]]
-; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP9]])
-; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
-; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP6]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], <i64 3, i64 2, i64 1, i64 0>
+; CHECK-NEXT: [[TMP3]] = extractelement <4 x i64> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i32 0
+; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP4]], 32
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP2]]
+; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP7]], 0
+; CHECK-NEXT: [[OP_EXTRA1]] = add i64 [[OP_EXTRA]], [[TMP3]]
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
index 2961cd2faf149..25b0f0caec213 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bad-reduction.ll
@@ -546,10 +546,8 @@ define void @PR47450(i16* nocapture readonly %p) {
; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X]] to i32
; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i32 [[Z]], 1
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[S]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[S]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[S]], i32 3
-; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([8 x i32]* @output to <4 x i32>*), align 16
; CHECK-NEXT: ret void
;
%x = load i16, i16* %p, align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index 458861ef26041..c0f453f296641 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -9,15 +9,13 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
; CHECK-NEXT: ret i32 undef
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
index 4dc807039d6ee..03717ad13d82f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
@@ -18,20 +18,16 @@ define void @bcast_vals(i64 *%A, i64 *%B, i64 *%S) {
; CHECK-NEXT: [[V1:%.*]] = sub i64 [[A0]], 1
; CHECK-NEXT: [[V2:%.*]] = sub i64 [[B0]], 1
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V1]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[V1]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[V1]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[V1]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[V2]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[V2]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[V2]], i32 3
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[TMP3]], [[TMP7]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[V2]], i32 0
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]]
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 1
; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 2
; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 3
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
-; CHECK-NEXT: store <4 x i64> [[TMP8]], <4 x i64>* [[TMP9]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
+; CHECK-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -81,16 +77,14 @@ define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[D0]], i32 3
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[V1]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V1]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[V1]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[SHUFFLE]], [[TMP4]]
; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1
; CHECK-NEXT: [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2
; CHECK-NEXT: [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 8
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
index a8e19984499ad..f7c0b92d10caf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -52,26 +52,12 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
;
; AVX-LABEL: @splat(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
-; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[C]], i32 1
-; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[C]], i32 2
-; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[C]], i32 3
-; AVX-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 [[C]], i32 4
-; AVX-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[C]], i32 5
-; AVX-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 [[C]], i32 6
-; AVX-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 [[C]], i32 7
-; AVX-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[C]], i32 8
-; AVX-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP9]], i8 [[C]], i32 9
-; AVX-NEXT: [[TMP11:%.*]] = insertelement <16 x i8> [[TMP10]], i8 [[C]], i32 10
-; AVX-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> [[TMP11]], i8 [[C]], i32 11
-; AVX-NEXT: [[TMP13:%.*]] = insertelement <16 x i8> [[TMP12]], i8 [[C]], i32 12
-; AVX-NEXT: [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13
-; AVX-NEXT: [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14
-; AVX-NEXT: [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15
-; AVX-NEXT: [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
-; AVX-NEXT: [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[B:%.*]], i32 1
-; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-; AVX-NEXT: [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]]
-; AVX-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
+; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> zeroinitializer
+; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+; AVX-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 [[B:%.*]], i32 1
+; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; AVX-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[SHUFFLE]], [[SHUFFLE1]]
+; AVX-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
; AVX-NEXT: ret void
;
%1 = xor i8 %c, %a
@@ -130,19 +116,15 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
;
; AVX-LABEL: @same_opcode_on_one_side(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
-; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C]], i32 1
-; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[C]], i32 2
-; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3
-; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
-; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
-; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
-; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3
-; AVX-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP4]], [[TMP8]]
-; AVX-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1
-; AVX-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2
-; AVX-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3
-; AVX-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]]
-; AVX-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
+; AVX-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
+; AVX-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; AVX-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1
+; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3
+; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]]
+; AVX-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
; AVX-NEXT: ret void
;
%add1 = add i32 %c, %a
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index d3a5b0d94b597..a6d6dc2c1a5b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -75,13 +75,11 @@ define i32 @foo(double* nocapture %A, i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 7.900000e+00, double 7.700000e+00, double 7.600000e+00, double 7.400000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], <double 6.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>*
-; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 6.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>*
+; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8
; CHECK-NEXT: ret i32 undef
;
entry:
@@ -209,13 +207,11 @@ define i32 @foo4(double* nocapture %A, i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 7.900000e+00, double 7.900000e+00, double 7.900000e+00, double 7.900000e+00>
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>*
-; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[SHUFFLE]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[A]] to <4 x double>*
+; CHECK-NEXT: store <4 x double> [[TMP5]], <4 x double>* [[TMP6]], align 8
; CHECK-NEXT: ret i32 undef
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
index 689ac5be4eac0..554170236184b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -24,13 +24,11 @@ define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
; CHECK-NEXT: ret i32 0
;
entry:
@@ -77,15 +75,13 @@ define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: ret i32 [[TMP8]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: ret i32 [[TMP5]]
;
entry:
%0 = load i32, i32* %A, align 4
@@ -123,15 +119,13 @@ define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[MUL238]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[SHUFFLE]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
-; CHECK-NEXT: ret i32 [[TMP8]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: ret i32 [[TMP5]]
;
entry:
%0 = load i32, i32* %A, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
index 4eeebdbb28711..830b882dac096 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond_broadcast.ll
@@ -8,13 +8,11 @@ define i32 @diamond_broadcast(i32* noalias nocapture %B, i32* noalias nocapture
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[LD]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[LD]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[LD]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LD]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP3]], [[TMP3]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[SHUFFLE]], [[SHUFFLE]]
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[B]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 7c5bbc775cb97..62e1dece673cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -8,18 +8,16 @@ define i32 @foo(i32* nocapture %A, i32 %n, i32 %m) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[N]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[N]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[N]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 9, i32 9, i32 9, i32 9>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
-; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP9]], [[M:%.*]]
-; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP9]], [[M]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]]
+; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]]
; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]]
; CHECK-NEXT: ret i32 [[ADD10]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index 33b4f7f706fe4..caa960eeeb454 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -1253,40 +1253,32 @@ define i32 @wobble(i32 %arg, i32 %bar) {
; CHECK-LABEL: @wobble(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
-; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]])
-; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
-; CHECK-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
-; CHECK-NEXT: ret i32 [[OP_EXTRA1]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
+; CHECK-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
+; CHECK-NEXT: ret i32 [[OP_EXTRA2]]
;
; THRESHOLD-LABEL: @wobble(
; THRESHOLD-NEXT: bb:
; THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[ARG:%.*]], i32 0
-; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[ARG]], i32 1
-; THRESHOLD-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[ARG]], i32 2
-; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ARG]], i32 3
-; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
-; THRESHOLD-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[BAR]], i32 1
-; THRESHOLD-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[BAR]], i32 2
-; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[BAR]], i32 3
-; THRESHOLD-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP3]], [[TMP7]]
-; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
-; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
-; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
-; THRESHOLD-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP11]])
-; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
-; THRESHOLD-NEXT: [[OP_EXTRA1:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
-; THRESHOLD-NEXT: ret i32 [[OP_EXTRA1]]
+; THRESHOLD-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; THRESHOLD-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[BAR:%.*]], i32 0
+; THRESHOLD-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; THRESHOLD-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer
+; THRESHOLD-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
+; THRESHOLD-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP6]], [[ARG]]
+; THRESHOLD-NEXT: [[OP_EXTRA2:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP3]]
+; THRESHOLD-NEXT: ret i32 [[OP_EXTRA2]]
;
bb:
%x1 = xor i32 %arg, %bar
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
index 4fa0e576cb5b0..7be473214244d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
@@ -25,21 +25,13 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-LABEL: @bar(
; SSE-NEXT: entry:
; SSE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[W:%.*]], i32 0
-; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[W]], i32 1
-; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[W]], i32 2
-; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[W]], i32 3
-; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
-; SSE-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[W]], i32 1
-; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[W]], i32 2
-; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[W]], i32 3
-; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
-; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[W]], i32 1
-; SSE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[W]], i32 2
-; SSE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[W]], i32 3
-; SSE-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
-; SSE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[W]], i32 1
-; SSE-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[W]], i32 2
-; SSE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[W]], i32 3
+; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
+; SSE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
+; SSE-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
+; SSE-NEXT: [[SHUFFLE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT: br label [[FOR_BODY:%.*]]
; SSE: for.body:
; SSE-NEXT: [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -59,25 +51,25 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
; SSE-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
; SSE-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
-; SSE-NEXT: [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>*
-; SSE-NEXT: [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
+; SSE-NEXT: [[TMP4:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>*
+; SSE-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
; SSE-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
-; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>*
-; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
+; SSE-NEXT: [[TMP6:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>*
+; SSE-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
; SSE-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
-; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>*
-; SSE-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
+; SSE-NEXT: [[TMP8:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>*
+; SSE-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
; SSE-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
-; SSE-NEXT: [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>*
-; SSE-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
-; SSE-NEXT: [[TMP24:%.*]] = icmp ult <4 x i8> [[TMP17]], [[TMP19]]
-; SSE-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i8> [[TMP23]], <4 x i8> [[TMP21]]
-; SSE-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32>
-; SSE-NEXT: [[TMP27:%.*]] = mul <4 x i32> [[TMP26]], [[TMP3]]
-; SSE-NEXT: [[TMP28:%.*]] = trunc <4 x i32> [[TMP27]] to <4 x i8>
+; SSE-NEXT: [[TMP10:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>*
+; SSE-NEXT: [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
+; SSE-NEXT: [[TMP12:%.*]] = icmp ult <4 x i8> [[TMP5]], [[TMP7]]
+; SSE-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i8> [[TMP11]], <4 x i8> [[TMP9]]
+; SSE-NEXT: [[TMP14:%.*]] = zext <4 x i8> [[TMP13]] to <4 x i32>
+; SSE-NEXT: [[TMP15:%.*]] = mul <4 x i32> [[TMP14]], [[SHUFFLE]]
+; SSE-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i8>
; SSE-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
-; SSE-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>*
-; SSE-NEXT: store <4 x i8> [[TMP28]], <4 x i8>* [[TMP29]], align 1
+; SSE-NEXT: [[TMP17:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>*
+; SSE-NEXT: store <4 x i8> [[TMP16]], <4 x i8>* [[TMP17]], align 1
; SSE-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
; SSE-NEXT: [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
; SSE-NEXT: [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
@@ -94,25 +86,25 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
; SSE-NEXT: [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
; SSE-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
-; SSE-NEXT: [[TMP30:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>*
-; SSE-NEXT: [[TMP31:%.*]] = load <4 x i8>, <4 x i8>* [[TMP30]], align 1
+; SSE-NEXT: [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>*
+; SSE-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
; SSE-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
-; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>*
-; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
+; SSE-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>*
+; SSE-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
; SSE-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
-; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>*
-; SSE-NEXT: [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1
+; SSE-NEXT: [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>*
+; SSE-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
; SSE-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
-; SSE-NEXT: [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>*
-; SSE-NEXT: [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1
-; SSE-NEXT: [[TMP38:%.*]] = icmp ult <4 x i8> [[TMP31]], [[TMP33]]
-; SSE-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP38]], <4 x i8> [[TMP37]], <4 x i8> [[TMP35]]
-; SSE-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i32>
-; SSE-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP7]]
-; SSE-NEXT: [[TMP42:%.*]] = trunc <4 x i32> [[TMP41]] to <4 x i8>
+; SSE-NEXT: [[TMP24:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>*
+; SSE-NEXT: [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
+; SSE-NEXT: [[TMP26:%.*]] = icmp ult <4 x i8> [[TMP19]], [[TMP21]]
+; SSE-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> [[TMP25]], <4 x i8> [[TMP23]]
+; SSE-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
+; SSE-NEXT: [[TMP29:%.*]] = mul <4 x i32> [[TMP28]], [[SHUFFLE1]]
+; SSE-NEXT: [[TMP30:%.*]] = trunc <4 x i32> [[TMP29]] to <4 x i8>
; SSE-NEXT: [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
-; SSE-NEXT: [[TMP43:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>*
-; SSE-NEXT: store <4 x i8> [[TMP42]], <4 x i8>* [[TMP43]], align 1
+; SSE-NEXT: [[TMP31:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>*
+; SSE-NEXT: store <4 x i8> [[TMP30]], <4 x i8>* [[TMP31]], align 1
; SSE-NEXT: [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
; SSE-NEXT: [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
; SSE-NEXT: [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
@@ -129,25 +121,25 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
; SSE-NEXT: [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
; SSE-NEXT: [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
-; SSE-NEXT: [[TMP44:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>*
-; SSE-NEXT: [[TMP45:%.*]] = load <4 x i8>, <4 x i8>* [[TMP44]], align 1
+; SSE-NEXT: [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>*
+; SSE-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
; SSE-NEXT: [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
-; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>*
-; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1
+; SSE-NEXT: [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>*
+; SSE-NEXT: [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1
; SSE-NEXT: [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
-; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>*
-; SSE-NEXT: [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1
+; SSE-NEXT: [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>*
+; SSE-NEXT: [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1
; SSE-NEXT: [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
-; SSE-NEXT: [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>*
-; SSE-NEXT: [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1
-; SSE-NEXT: [[TMP52:%.*]] = icmp ult <4 x i8> [[TMP45]], [[TMP47]]
-; SSE-NEXT: [[TMP53:%.*]] = select <4 x i1> [[TMP52]], <4 x i8> [[TMP51]], <4 x i8> [[TMP49]]
-; SSE-NEXT: [[TMP54:%.*]] = zext <4 x i8> [[TMP53]] to <4 x i32>
-; SSE-NEXT: [[TMP55:%.*]] = mul <4 x i32> [[TMP54]], [[TMP11]]
-; SSE-NEXT: [[TMP56:%.*]] = trunc <4 x i32> [[TMP55]] to <4 x i8>
+; SSE-NEXT: [[TMP38:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>*
+; SSE-NEXT: [[TMP39:%.*]] = load <4 x i8>, <4 x i8>* [[TMP38]], align 1
+; SSE-NEXT: [[TMP40:%.*]] = icmp ult <4 x i8> [[TMP33]], [[TMP35]]
+; SSE-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP40]], <4 x i8> [[TMP39]], <4 x i8> [[TMP37]]
+; SSE-NEXT: [[TMP42:%.*]] = zext <4 x i8> [[TMP41]] to <4 x i32>
+; SSE-NEXT: [[TMP43:%.*]] = mul <4 x i32> [[TMP42]], [[SHUFFLE2]]
+; SSE-NEXT: [[TMP44:%.*]] = trunc <4 x i32> [[TMP43]] to <4 x i8>
; SSE-NEXT: [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
-; SSE-NEXT: [[TMP57:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>*
-; SSE-NEXT: store <4 x i8> [[TMP56]], <4 x i8>* [[TMP57]], align 1
+; SSE-NEXT: [[TMP45:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>*
+; SSE-NEXT: store <4 x i8> [[TMP44]], <4 x i8>* [[TMP45]], align 1
; SSE-NEXT: [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
; SSE-NEXT: [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
; SSE-NEXT: [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
@@ -164,25 +156,25 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; SSE-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; SSE-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; SSE-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
-; SSE-NEXT: [[TMP58:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>*
-; SSE-NEXT: [[TMP59:%.*]] = load <4 x i8>, <4 x i8>* [[TMP58]], align 1
+; SSE-NEXT: [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>*
+; SSE-NEXT: [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1
; SSE-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
-; SSE-NEXT: [[TMP60:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>*
-; SSE-NEXT: [[TMP61:%.*]] = load <4 x i8>, <4 x i8>* [[TMP60]], align 1
+; SSE-NEXT: [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>*
+; SSE-NEXT: [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1
; SSE-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
-; SSE-NEXT: [[TMP62:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>*
-; SSE-NEXT: [[TMP63:%.*]] = load <4 x i8>, <4 x i8>* [[TMP62]], align 1
+; SSE-NEXT: [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>*
+; SSE-NEXT: [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1
; SSE-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
-; SSE-NEXT: [[TMP64:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>*
-; SSE-NEXT: [[TMP65:%.*]] = load <4 x i8>, <4 x i8>* [[TMP64]], align 1
-; SSE-NEXT: [[TMP66:%.*]] = icmp ult <4 x i8> [[TMP59]], [[TMP61]]
-; SSE-NEXT: [[TMP67:%.*]] = select <4 x i1> [[TMP66]], <4 x i8> [[TMP65]], <4 x i8> [[TMP63]]
-; SSE-NEXT: [[TMP68:%.*]] = zext <4 x i8> [[TMP67]] to <4 x i32>
-; SSE-NEXT: [[TMP69:%.*]] = mul <4 x i32> [[TMP68]], [[TMP15]]
-; SSE-NEXT: [[TMP70:%.*]] = trunc <4 x i32> [[TMP69]] to <4 x i8>
+; SSE-NEXT: [[TMP52:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>*
+; SSE-NEXT: [[TMP53:%.*]] = load <4 x i8>, <4 x i8>* [[TMP52]], align 1
+; SSE-NEXT: [[TMP54:%.*]] = icmp ult <4 x i8> [[TMP47]], [[TMP49]]
+; SSE-NEXT: [[TMP55:%.*]] = select <4 x i1> [[TMP54]], <4 x i8> [[TMP53]], <4 x i8> [[TMP51]]
+; SSE-NEXT: [[TMP56:%.*]] = zext <4 x i8> [[TMP55]] to <4 x i32>
+; SSE-NEXT: [[TMP57:%.*]] = mul <4 x i32> [[TMP56]], [[SHUFFLE3]]
+; SSE-NEXT: [[TMP58:%.*]] = trunc <4 x i32> [[TMP57]] to <4 x i8>
; SSE-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
-; SSE-NEXT: [[TMP71:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>*
-; SSE-NEXT: store <4 x i8> [[TMP70]], <4 x i8>* [[TMP71]], align 1
+; SSE-NEXT: [[TMP59:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>*
+; SSE-NEXT: store <4 x i8> [[TMP58]], <4 x i8>* [[TMP59]], align 1
; SSE-NEXT: [[INC]] = add nuw nsw i32 [[I_0356]], 1
; SSE-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
; SSE-NEXT: [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
@@ -197,21 +189,7 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; AVX512-LABEL: @bar(
; AVX512-NEXT: entry:
; AVX512-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0
-; AVX512-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1
-; AVX512-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2
-; AVX512-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3
-; AVX512-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[W]], i32 4
-; AVX512-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[W]], i32 5
-; AVX512-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[W]], i32 6
-; AVX512-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[W]], i32 7
-; AVX512-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[W]], i32 8
-; AVX512-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[W]], i32 9
-; AVX512-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[W]], i32 10
-; AVX512-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[W]], i32 11
-; AVX512-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[W]], i32 12
-; AVX512-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[W]], i32 13
-; AVX512-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[W]], i32 14
-; AVX512-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[W]], i32 15
+; AVX512-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT: br label [[FOR_BODY:%.*]]
; AVX512: for.body:
; AVX512-NEXT: [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
@@ -291,25 +269,25 @@ define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readon
; AVX512-NEXT: [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; AVX512-NEXT: [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; AVX512-NEXT: [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
-; AVX512-NEXT: [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
-; AVX512-NEXT: [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1
+; AVX512-NEXT: [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
+; AVX512-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
; AVX512-NEXT: [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
-; AVX512-NEXT: [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
-; AVX512-NEXT: [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1
+; AVX512-NEXT: [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
+; AVX512-NEXT: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
; AVX512-NEXT: [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
-; AVX512-NEXT: [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
-; AVX512-NEXT: [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1
+; AVX512-NEXT: [[TMP5:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
+; AVX512-NEXT: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 1
; AVX512-NEXT: [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
-; AVX512-NEXT: [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
-; AVX512-NEXT: [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1
-; AVX512-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]]
-; AVX512-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]]
-; AVX512-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32>
-; AVX512-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]]
-; AVX512-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8>
+; AVX512-NEXT: [[TMP7:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
+; AVX512-NEXT: [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1
+; AVX512-NEXT: [[TMP9:%.*]] = icmp ult <16 x i8> [[TMP2]], [[TMP4]]
+; AVX512-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP6]]
+; AVX512-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32>
+; AVX512-NEXT: [[TMP12:%.*]] = mul <16 x i32> [[TMP11]], [[SHUFFLE]]
+; AVX512-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
; AVX512-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
-; AVX512-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
-; AVX512-NEXT: store <16 x i8> [[TMP28]], <16 x i8>* [[TMP29]], align 1
+; AVX512-NEXT: [[TMP14:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
+; AVX512-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP14]], align 1
; AVX512-NEXT: [[INC]] = add nuw nsw i32 [[I_0356]], 1
; AVX512-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
; AVX512-NEXT: [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index 748d7819a4f25..4390877d148a1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -29,19 +29,13 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, <8 x i32>* [[TMP7]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> poison, i32 [[N]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[N]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[N]], i32 2
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[N]], i32 3
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP12]], i32 [[N]], i32 4
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7
-; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP8]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
-; CHECK-NEXT: store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <8 x i32> [[TMP8]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>*
+; CHECK-NEXT: store <8 x i32> [[TMP10]], <8 x i32>* [[TMP11]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8
-; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], [[N]]
+; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
index 084b1d230daf5..4219082465d0f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -16,20 +16,18 @@ define i32 @foo(i32* nocapture %A, i32 %n) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[N:%.*]], 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 7, i32 8, i32 9, i32 10>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
-; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[A]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP1]], 11
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[TMP11]], [[TMP13]]
-; CHECK-NEXT: store i32 [[TMP14]], i32* [[TMP12]], align 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 7, i32 8, i32 9, i32 10>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
+; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP1]], 11
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP8]], [[TMP10]]
+; CHECK-NEXT: store i32 [[TMP11]], i32* [[TMP9]], align 4
; CHECK-NEXT: ret i32 undef
;
%1 = mul nsw i32 %n, 5
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index 7bfee28d0310a..9e3a886169a3f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -32,7 +32,7 @@ define void @fextr(i16* %ptr) {
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index 29460998d3b19..d3da0c9572028 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -20,43 +20,35 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
; CHECK-NEXT: [[ARRAYIDX_I_I7_6_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 6
; CHECK-NEXT: [[ARRAYIDX_I_I7_7_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 7
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[CONV31_I]], i32 2
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[CONV31_I]], i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[CONV31_I]], i32 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[CONV31_I]], i32 5
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[CONV31_I]], i32 6
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[CONV31_I]], i32 7
-; CHECK-NEXT: [[TMP9:%.*]] = lshr <8 x i32> [[TMP8]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[SHUFFLE]], <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
; CHECK-NEXT: [[ARRAYIDX_I_I7_8_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 8
; CHECK-NEXT: [[ARRAYIDX_I_I7_9_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 9
; CHECK-NEXT: [[ARRAYIDX_I_I7_10_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 10
; CHECK-NEXT: [[ARRAYIDX_I_I7_11_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 11
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[CONV31_I]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[CONV31_I]], i32 3
-; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP13]], <i32 9, i32 10, i32 11, i32 12>
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[SHUFFLE1]], <i32 9, i32 10, i32 11, i32 12>
; CHECK-NEXT: [[ARRAYIDX_I_I7_12_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 12
; CHECK-NEXT: [[SHR_12_I_I:%.*]] = lshr i32 [[CONV31_I]], 13
; CHECK-NEXT: [[ARRAYIDX_I_I7_13_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 13
; CHECK-NEXT: [[ARRAYIDX_I_I7_14_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 14
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[CONV31_I]], i32 1
-; CHECK-NEXT: [[TMP17:%.*]] = lshr <2 x i32> [[TMP16]], <i32 14, i32 15>
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x i32> [[TMP20]], <16 x i32> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SHR_12_I_I]], i32 13
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i32> [[TMP23]], <16 x i32> [[TMP24]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
-; CHECK-NEXT: [[TMP26:%.*]] = trunc <16 x i32> [[TMP25]] to <16 x i8>
-; CHECK-NEXT: [[TMP27:%.*]] = and <16 x i8> [[TMP26]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[CONV31_I]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i32> [[TMP6]], <i32 14, i32 15>
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> [[TMP9]], <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[SHR_12_I_I]], i32 13
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i32> [[TMP13]], <16 x i32> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
+; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP15]] to <16 x i8>
+; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT: store <16 x i8> [[TMP27]], <16 x i8>* [[TMP28]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
+; CHECK-NEXT: store <16 x i8> [[TMP17]], <16 x i8>* [[TMP18]], align 1
; CHECK-NEXT: unreachable
; CHECK: if.end50.i:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
index 0cbff5ef22c12..a5a621b005e0b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -12,15 +12,13 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
-; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP4]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds i32, i32* %x, i64 %i
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index 65f02770ab853..d827ea2bc8cb8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -226,11 +226,9 @@ define void @store_splat(float*, float) {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 2
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP0]], i64 3
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i32 2
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP8]], align 4
; CHECK-NEXT: ret void
;
%3 = getelementptr inbounds float, float* %0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
index 561dc23b78782..24aded3bea515 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
@@ -44,212 +44,210 @@ define void @n() local_unnamed_addr #0 {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], -183
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
-; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP30]], i32 1
-; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP30]], i32 2
-; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3
-; CHECK-NEXT: [[TMP35:%.*]] = sub <4 x i32> [[TMP34]], [[TMP0]]
-; CHECK-NEXT: [[TMP36:%.*]] = icmp slt <4 x i32> [[TMP35]], zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP35]]
-; CHECK-NEXT: [[TMP38:%.*]] = select <4 x i1> [[TMP36]], <4 x i32> [[TMP37]], <4 x i32> [[TMP35]]
-; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP38]])
-; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP39]], [[B_0]]
-; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP39]], i32 [[B_0]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP32:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
+; CHECK-NEXT: [[TMP33:%.*]] = icmp slt <4 x i32> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP34:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP32]]
+; CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i32> [[TMP34]], <4 x i32> [[TMP32]]
+; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP35]])
+; CHECK-NEXT: [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP36]], [[B_0]]
+; CHECK-NEXT: [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP36]], i32 [[B_0]]
; CHECK-NEXT: [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]]
-; CHECK-NEXT: [[TMP40:%.*]] = icmp slt i32 [[SUB_116]], 0
+; CHECK-NEXT: [[TMP37:%.*]] = icmp slt i32 [[SUB_116]], 0
; CHECK-NEXT: [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]]
-; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[NEG_117]], i32 [[SUB_116]]
-; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP41]], [[OP_EXTRA1]]
-; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP41]], i32 [[OP_EXTRA1]]
+; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[NEG_117]], i32 [[SUB_116]]
+; CHECK-NEXT: [[CMP12_118:%.*]] = icmp slt i32 [[TMP38]], [[OP_EXTRA1]]
+; CHECK-NEXT: [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP38]], i32 [[OP_EXTRA1]]
; CHECK-NEXT: [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]]
-; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[SUB_1_1]], 0
+; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[SUB_1_1]], 0
; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
-; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
-; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP43]], [[SPEC_SELECT8_120]]
+; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
+; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP40]], [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP43]], i32 [[SPEC_SELECT8_120]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP40]], i32 [[SPEC_SELECT8_120]]
; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
-; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[SUB_2_1]], 0
+; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[SUB_2_1]], 0
; CHECK-NEXT: [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]]
-; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
-; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP45]], [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
+; CHECK-NEXT: [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP45]], i32 [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP42]], i32 [[SPEC_SELECT8_1_1]]
; CHECK-NEXT: [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]]
-; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[SUB_3_1]], 0
+; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[SUB_3_1]], 0
; CHECK-NEXT: [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]]
-; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
-; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
+; CHECK-NEXT: [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]]
; CHECK-NEXT: [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32
-; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP47]], i32 [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP44]], i32 [[SPEC_SELECT8_2_1]]
; CHECK-NEXT: [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]]
-; CHECK-NEXT: [[TMP48:%.*]] = icmp slt i32 [[SUB_222]], 0
+; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[SUB_222]], 0
; CHECK-NEXT: [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]]
-; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP48]], i32 [[NEG_223]], i32 [[SUB_222]]
-; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP49]], [[SPEC_SELECT8_3_1]]
-; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP49]], i32 [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_223]], i32 [[SUB_222]]
+; CHECK-NEXT: [[CMP12_224:%.*]] = icmp slt i32 [[TMP46]], [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT: [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP46]], i32 [[SPEC_SELECT8_3_1]]
; CHECK-NEXT: [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]]
-; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_1_2]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_1_2]], 0
; CHECK-NEXT: [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]]
-; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
-; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_226]]
-; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_226]]
+; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
+; CHECK-NEXT: [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_226]]
+; CHECK-NEXT: [[TMP49:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP48]], i32 [[SPEC_SELECT8_226]]
; CHECK-NEXT: [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]]
-; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_2_2]], 0
+; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[SUB_2_2]], 0
; CHECK-NEXT: [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]]
-; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
-; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_1_2]]
-; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_2_2]], [[TMP52]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
+; CHECK-NEXT: [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT: [[TMP52:%.*]] = or i1 [[CMP12_2_2]], [[TMP49]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_1_2]]
; CHECK-NEXT: [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]]
-; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_3_2]], 0
+; CHECK-NEXT: [[TMP53:%.*]] = icmp slt i32 [[SUB_3_2]], 0
; CHECK-NEXT: [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]]
-; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
-; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_2_2]]
-; CHECK-NEXT: [[TMP58:%.*]] = or i1 [[CMP12_3_2]], [[TMP55]]
-; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP58]], i32 2, i32 [[SPEC_SELECT_3_1]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP57]], i32 [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
+; CHECK-NEXT: [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT: [[TMP55:%.*]] = or i1 [[CMP12_3_2]], [[TMP52]]
+; CHECK-NEXT: [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP55]], i32 2, i32 [[SPEC_SELECT_3_1]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_2_2]]
; CHECK-NEXT: [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]]
-; CHECK-NEXT: [[TMP59:%.*]] = icmp slt i32 [[SUB_328]], 0
+; CHECK-NEXT: [[TMP56:%.*]] = icmp slt i32 [[SUB_328]], 0
; CHECK-NEXT: [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]]
-; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_329]], i32 [[SUB_328]]
-; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_3_2]]
-; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP60]], i32 [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_329]], i32 [[SUB_328]]
+; CHECK-NEXT: [[CMP12_330:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT: [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_2]]
; CHECK-NEXT: [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]]
-; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_1_3]], 0
+; CHECK-NEXT: [[TMP58:%.*]] = icmp slt i32 [[SUB_1_3]], 0
; CHECK-NEXT: [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]]
-; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
-; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_332]]
-; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_332]]
+; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
+; CHECK-NEXT: [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_332]]
+; CHECK-NEXT: [[TMP60:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP59]], i32 [[SPEC_SELECT8_332]]
; CHECK-NEXT: [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]]
-; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_2_3]], 0
+; CHECK-NEXT: [[TMP61:%.*]] = icmp slt i32 [[SUB_2_3]], 0
; CHECK-NEXT: [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]]
-; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
-; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_1_3]]
-; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_2_3]], [[TMP63]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
+; CHECK-NEXT: [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT: [[TMP63:%.*]] = or i1 [[CMP12_2_3]], [[TMP60]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_1_3]]
; CHECK-NEXT: [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]]
-; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_3_3]], 0
+; CHECK-NEXT: [[TMP64:%.*]] = icmp slt i32 [[SUB_3_3]], 0
; CHECK-NEXT: [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]]
-; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
-; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_3]]
-; CHECK-NEXT: [[TMP69:%.*]] = or i1 [[CMP12_3_3]], [[TMP66]]
-; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP69]], i32 3, i32 [[SPEC_SELECT_3_2]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
+; CHECK-NEXT: [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT: [[TMP66:%.*]] = or i1 [[CMP12_3_3]], [[TMP63]]
+; CHECK-NEXT: [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP66]], i32 3, i32 [[SPEC_SELECT_3_2]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_2_3]]
; CHECK-NEXT: [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]]
-; CHECK-NEXT: [[TMP70:%.*]] = icmp slt i32 [[SUB_4]], 0
+; CHECK-NEXT: [[TMP67:%.*]] = icmp slt i32 [[SUB_4]], 0
; CHECK-NEXT: [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]]
-; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[NEG_4]], i32 [[SUB_4]]
-; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP71]], [[SPEC_SELECT8_3_3]]
-; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP71]], i32 [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_4]], i32 [[SUB_4]]
+; CHECK-NEXT: [[CMP12_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT: [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_3_3]]
; CHECK-NEXT: [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]]
-; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_1_4]], 0
+; CHECK-NEXT: [[TMP69:%.*]] = icmp slt i32 [[SUB_1_4]], 0
; CHECK-NEXT: [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]]
-; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
-; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_4]]
-; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_4]]
+; CHECK-NEXT: [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
+; CHECK-NEXT: [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_4]]
+; CHECK-NEXT: [[TMP71:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP70]], i32 [[SPEC_SELECT8_4]]
; CHECK-NEXT: [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]]
-; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_2_4]], 0
+; CHECK-NEXT: [[TMP72:%.*]] = icmp slt i32 [[SUB_2_4]], 0
; CHECK-NEXT: [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]]
-; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
-; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_1_4]]
-; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_2_4]], [[TMP74]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT: [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
+; CHECK-NEXT: [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT: [[TMP74:%.*]] = or i1 [[CMP12_2_4]], [[TMP71]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_1_4]]
; CHECK-NEXT: [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]]
-; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_3_4]], 0
+; CHECK-NEXT: [[TMP75:%.*]] = icmp slt i32 [[SUB_3_4]], 0
; CHECK-NEXT: [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]]
-; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
-; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_2_4]]
-; CHECK-NEXT: [[TMP80:%.*]] = or i1 [[CMP12_3_4]], [[TMP77]]
-; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP80]], i32 4, i32 [[SPEC_SELECT_3_3]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP79]], i32 [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
+; CHECK-NEXT: [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT: [[TMP77:%.*]] = or i1 [[CMP12_3_4]], [[TMP74]]
+; CHECK-NEXT: [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP77]], i32 4, i32 [[SPEC_SELECT_3_3]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_2_4]]
; CHECK-NEXT: [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]]
-; CHECK-NEXT: [[TMP81:%.*]] = icmp slt i32 [[SUB_5]], 0
+; CHECK-NEXT: [[TMP78:%.*]] = icmp slt i32 [[SUB_5]], 0
; CHECK-NEXT: [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]]
-; CHECK-NEXT: [[TMP82:%.*]] = select i1 [[TMP81]], i32 [[NEG_5]], i32 [[SUB_5]]
-; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP82]], [[SPEC_SELECT8_3_4]]
-; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP82]], i32 [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_5]], i32 [[SUB_5]]
+; CHECK-NEXT: [[CMP12_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT: [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_3_4]]
; CHECK-NEXT: [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]]
-; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_1_5]], 0
+; CHECK-NEXT: [[TMP80:%.*]] = icmp slt i32 [[SUB_1_5]], 0
; CHECK-NEXT: [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]]
-; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
-; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_5]]
-; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_5]]
+; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
+; CHECK-NEXT: [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_5]]
+; CHECK-NEXT: [[TMP82:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP81]], i32 [[SPEC_SELECT8_5]]
; CHECK-NEXT: [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]]
-; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_2_5]], 0
+; CHECK-NEXT: [[TMP83:%.*]] = icmp slt i32 [[SUB_2_5]], 0
; CHECK-NEXT: [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]]
-; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
-; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_1_5]]
-; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_2_5]], [[TMP85]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
+; CHECK-NEXT: [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT: [[TMP85:%.*]] = or i1 [[CMP12_2_5]], [[TMP82]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_1_5]]
; CHECK-NEXT: [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]]
-; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_3_5]], 0
+; CHECK-NEXT: [[TMP86:%.*]] = icmp slt i32 [[SUB_3_5]], 0
; CHECK-NEXT: [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]]
-; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
-; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_2_5]]
-; CHECK-NEXT: [[TMP91:%.*]] = or i1 [[CMP12_3_5]], [[TMP88]]
-; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP91]], i32 5, i32 [[SPEC_SELECT_3_4]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP90]], i32 [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
+; CHECK-NEXT: [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT: [[TMP88:%.*]] = or i1 [[CMP12_3_5]], [[TMP85]]
+; CHECK-NEXT: [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP88]], i32 5, i32 [[SPEC_SELECT_3_4]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_2_5]]
; CHECK-NEXT: [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]]
-; CHECK-NEXT: [[TMP92:%.*]] = icmp slt i32 [[SUB_6]], 0
+; CHECK-NEXT: [[TMP89:%.*]] = icmp slt i32 [[SUB_6]], 0
; CHECK-NEXT: [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]]
-; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[NEG_6]], i32 [[SUB_6]]
-; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP93]], [[SPEC_SELECT8_3_5]]
-; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP93]], i32 [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_6]], i32 [[SUB_6]]
+; CHECK-NEXT: [[CMP12_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT: [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_5]]
; CHECK-NEXT: [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]]
-; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_1_6]], 0
+; CHECK-NEXT: [[TMP91:%.*]] = icmp slt i32 [[SUB_1_6]], 0
; CHECK-NEXT: [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]]
-; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
-; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_6]]
-; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_6]]
+; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
+; CHECK-NEXT: [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_6]]
+; CHECK-NEXT: [[TMP93:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP92]], i32 [[SPEC_SELECT8_6]]
; CHECK-NEXT: [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]]
-; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_2_6]], 0
+; CHECK-NEXT: [[TMP94:%.*]] = icmp slt i32 [[SUB_2_6]], 0
; CHECK-NEXT: [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]]
-; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
-; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_1_6]]
-; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_2_6]], [[TMP96]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
+; CHECK-NEXT: [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT: [[TMP96:%.*]] = or i1 [[CMP12_2_6]], [[TMP93]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_1_6]]
; CHECK-NEXT: [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]]
-; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_3_6]], 0
+; CHECK-NEXT: [[TMP97:%.*]] = icmp slt i32 [[SUB_3_6]], 0
; CHECK-NEXT: [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]]
-; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
-; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_2_6]]
-; CHECK-NEXT: [[TMP102:%.*]] = or i1 [[CMP12_3_6]], [[TMP99]]
-; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP102]], i32 6, i32 [[SPEC_SELECT_3_5]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP101]], i32 [[SPEC_SELECT8_2_6]]
+; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
+; CHECK-NEXT: [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]]
+; CHECK-NEXT: [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP96]]
+; CHECK-NEXT: [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]]
; CHECK-NEXT: [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]]
-; CHECK-NEXT: [[TMP103:%.*]] = icmp slt i32 [[SUB_7]], 0
+; CHECK-NEXT: [[TMP100:%.*]] = icmp slt i32 [[SUB_7]], 0
; CHECK-NEXT: [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]]
-; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP103]], i32 [[NEG_7]], i32 [[SUB_7]]
-; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP104]], [[SPEC_SELECT8_3_6]]
-; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP104]], i32 [[SPEC_SELECT8_3_6]]
+; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_7]], i32 [[SUB_7]]
+; CHECK-NEXT: [[CMP12_7:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_3_6]]
+; CHECK-NEXT: [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP101]], i32 [[SPEC_SELECT8_3_6]]
; CHECK-NEXT: [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]]
-; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_1_7]], 0
+; CHECK-NEXT: [[TMP102:%.*]] = icmp slt i32 [[SUB_1_7]], 0
; CHECK-NEXT: [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]]
-; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
-; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_7]]
-; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
-; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_7]]
+; CHECK-NEXT: [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
+; CHECK-NEXT: [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_7]]
+; CHECK-NEXT: [[TMP104:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
+; CHECK-NEXT: [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP103]], i32 [[SPEC_SELECT8_7]]
; CHECK-NEXT: [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]]
-; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_2_7]], 0
+; CHECK-NEXT: [[TMP105:%.*]] = icmp slt i32 [[SUB_2_7]], 0
; CHECK-NEXT: [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]]
-; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
-; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]]
-; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP107]]
-; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT: [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
+; CHECK-NEXT: [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT: [[TMP107:%.*]] = or i1 [[CMP12_2_7]], [[TMP104]]
+; CHECK-NEXT: [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_1_7]]
; CHECK-NEXT: [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]]
-; CHECK-NEXT: [[TMP111:%.*]] = icmp slt i32 [[SUB_3_7]], 0
+; CHECK-NEXT: [[TMP108:%.*]] = icmp slt i32 [[SUB_3_7]], 0
; CHECK-NEXT: [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]]
-; CHECK-NEXT: [[TMP112:%.*]] = select i1 [[TMP111]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
-; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP112]], [[SPEC_SELECT8_2_7]]
-; CHECK-NEXT: [[TMP113:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]]
-; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP113]], i32 7, i32 [[SPEC_SELECT_3_6]]
-; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP112]], i32 [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT: [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
+; CHECK-NEXT: [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT: [[TMP110:%.*]] = or i1 [[CMP12_3_7]], [[TMP107]]
+; CHECK-NEXT: [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP110]], i32 7, i32 [[SPEC_SELECT_3_6]]
+; CHECK-NEXT: [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_2_7]]
; CHECK-NEXT: [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
index 6ebc67a90fe86..33ba97921e878 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
@@ -137,53 +137,23 @@ define void @phi_float32(half %hval, float %fval) {
; MAX256-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float
; MAX256-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float
; MAX256-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
-; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1
-; MAX256-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2
-; MAX256-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3
-; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4
-; MAX256-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5
-; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6
-; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7
-; MAX256-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
-; MAX256-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1
-; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2
-; MAX256-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3
-; MAX256-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4
-; MAX256-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5
-; MAX256-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6
-; MAX256-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7
-; MAX256-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]]
-; MAX256-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]]
-; MAX256-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
-; MAX256-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1
-; MAX256-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2
-; MAX256-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3
-; MAX256-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4
-; MAX256-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5
-; MAX256-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6
-; MAX256-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7
-; MAX256-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]]
-; MAX256-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]]
-; MAX256-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
-; MAX256-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1
-; MAX256-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2
-; MAX256-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3
-; MAX256-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4
-; MAX256-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5
-; MAX256-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6
-; MAX256-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7
-; MAX256-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]]
-; MAX256-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]]
-; MAX256-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
-; MAX256-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1
-; MAX256-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2
-; MAX256-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3
-; MAX256-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4
-; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5
-; MAX256-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6
-; MAX256-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7
-; MAX256-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]]
-; MAX256-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]]
+; MAX256-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
+; MAX256-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
+; MAX256-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
+; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
+; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
+; MAX256-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
+; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
+; MAX256-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
+; MAX256-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
+; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
+; MAX256-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
+; MAX256-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [
; MAX256-NEXT: i32 0, label [[BB2:%.*]]
; MAX256-NEXT: i32 1, label [[BB3:%.*]]
@@ -196,12 +166,12 @@ define void @phi_float32(half %hval, float %fval) {
; MAX256: bb5:
; MAX256-NEXT: br label [[BB2]]
; MAX256: bb2:
-; MAX256-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ]
-; MAX256-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ]
-; MAX256-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ]
-; MAX256-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ]
-; MAX256-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7
-; MAX256-NEXT: store float [[TMP52]], float* undef, align 4
+; MAX256-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
+; MAX256-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
+; MAX256-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
+; MAX256-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
+; MAX256-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
+; MAX256-NEXT: store float [[TMP17]], float* undef, align 4
; MAX256-NEXT: ret void
;
; MAX1024-LABEL: @phi_float32(
@@ -213,53 +183,23 @@ define void @phi_float32(half %hval, float %fval) {
; MAX1024-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float
; MAX1024-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float
; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
-; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1
-; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2
-; MAX1024-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3
-; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4
-; MAX1024-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5
-; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6
-; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7
-; MAX1024-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
-; MAX1024-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1
-; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2
-; MAX1024-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3
-; MAX1024-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4
-; MAX1024-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5
-; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6
-; MAX1024-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7
-; MAX1024-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]]
-; MAX1024-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]]
-; MAX1024-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
-; MAX1024-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1
-; MAX1024-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2
-; MAX1024-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3
-; MAX1024-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4
-; MAX1024-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5
-; MAX1024-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6
-; MAX1024-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7
-; MAX1024-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]]
-; MAX1024-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]]
-; MAX1024-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
-; MAX1024-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1
-; MAX1024-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2
-; MAX1024-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3
-; MAX1024-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4
-; MAX1024-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5
-; MAX1024-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6
-; MAX1024-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7
-; MAX1024-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]]
-; MAX1024-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]]
-; MAX1024-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
-; MAX1024-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1
-; MAX1024-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2
-; MAX1024-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3
-; MAX1024-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4
-; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5
-; MAX1024-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6
-; MAX1024-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7
-; MAX1024-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]]
-; MAX1024-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]]
+; MAX1024-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
+; MAX1024-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX1024-NEXT: [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
+; MAX1024-NEXT: [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
+; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
+; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX1024-NEXT: [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
+; MAX1024-NEXT: [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
+; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
+; MAX1024-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX1024-NEXT: [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
+; MAX1024-NEXT: [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
+; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
+; MAX1024-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX1024-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
+; MAX1024-NEXT: [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [
; MAX1024-NEXT: i32 0, label [[BB2:%.*]]
; MAX1024-NEXT: i32 1, label [[BB3:%.*]]
@@ -272,12 +212,12 @@ define void @phi_float32(half %hval, float %fval) {
; MAX1024: bb5:
; MAX1024-NEXT: br label [[BB2]]
; MAX1024: bb2:
-; MAX1024-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ]
-; MAX1024-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ]
-; MAX1024-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ]
-; MAX1024-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ]
-; MAX1024-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7
-; MAX1024-NEXT: store float [[TMP52]], float* undef, align 4
+; MAX1024-NEXT: [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
+; MAX1024-NEXT: [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
+; MAX1024-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
+; MAX1024-NEXT: [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
+; MAX1024-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
+; MAX1024-NEXT: store float [[TMP17]], float* undef, align 4
; MAX1024-NEXT: ret void
;
bb:
More information about the llvm-commits
mailing list