[llvm] fc9c59c - [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri May 20 06:00:24 PDT 2022
Author: Alexey Bataev
Date: 2022-05-20T05:58:09-07:00
New Revision: fc9c59c355cb255446e571b4515b5e41a76503c4
URL: https://github.com/llvm/llvm-project/commit/fc9c59c355cb255446e571b4515b5e41a76503c4
DIFF: https://github.com/llvm/llvm-project/commit/fc9c59c355cb255446e571b4515b5e41a76503c4.diff
LOG: [SLP]Do not emit extract elements for insertelements users, replace with shuffles directly.
SLP vectorizer emits extracts for externally used vectorized scalars and
estimates the cost for each such extract. But in many cases these
scalars are input for insertelement instructions, forming buildvector,
and instead of extractelement/insertelement pair we can emit/cost
estimate shuffle(s) cost and generate series of shuffles, which can be
further optimized.
Tested using test-suite (+SPEC2017), the tests passed, SLP was able to
generate/vectorize more instructions in many cases and it allowed to reduce
number of re-vectorization attempts (where we could try to vectorize
buildector insertelements again and again).
Differential Revision: https://reviews.llvm.org/D107966
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
llvm/test/Transforms/SLPVectorizer/X86/phi.ll
llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e366044353b1f..ca9345f5233e4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6668,6 +6668,23 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
llvm_unreachable("Two
diff erent buildvectors not expected.");
}
+namespace {
+/// Returns incoming Value *, if the requested type is Value * too, or a default
+/// value, otherwise.
+struct ValueSelect {
+ template <typename U>
+ static typename std::enable_if<std::is_same<Value *, U>::value, Value *>::type
+ get(Value *V) {
+ return V;
+ }
+ template <typename U>
+ static typename std::enable_if<!std::is_same<Value *, U>::value, U>::type
+ get(Value *) {
+ return U();
+ }
+};
+} // namespace
+
/// Does the analysis of the provided shuffle masks and performs the requested
/// actions on the vectors with the given shuffle masks. It tries to do it in
/// several steps.
@@ -6700,6 +6717,9 @@ static T *performExtractsShuffleAction(
else
Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
}
+ auto *V = ValueSelect::get<T *>(Base);
+ assert((!V || GetVF(V) == Mask.size()) &&
+ "Expected base vector of VF number of elements.");
Prev = Action(Mask, {nullptr, Res.first});
} else if (ShuffleMask.size() == 1) {
// Base is undef and only 1 vector is shuffled - perform the action only for
@@ -8105,6 +8125,17 @@ Value *BoUpSLP::vectorizeTree() {
return vectorizeTree(ExternallyUsedValues);
}
+namespace {
+/// Data type for handling buildvector sequences with the reused scalars from
+/// other tree entries.
+struct ShuffledInsertData {
+ /// List of insertelements to be replaced by shuffles.
+ SmallVector<InsertElementInst *> InsertElements;
+ /// The parent vectors and shuffle mask for the given list of inserts.
+ MapVector<Value *, SmallVector<int>> ValueMasks;
+};
+} // namespace
+
Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
@@ -8138,6 +8169,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
<< " values .\n");
+ SmallVector<ShuffledInsertData> ShuffledInserts;
+ // Maps vector instruction to original insertelement instruction
+ DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -8177,6 +8211,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
"In-tree scalar of vector type is not insertelement?");
+ auto *IE = cast<InsertElementInst>(Scalar);
+ VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
// If User == nullptr, the Scalar is used as extra arg. Generate
@@ -8205,6 +8241,64 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
continue;
}
+ if (auto *VU = dyn_cast<InsertElementInst>(User)) {
+ if (!Scalar->getType()->isVectorTy()) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
+ Optional<unsigned> InsertIdx = getInsertIndex(VU);
+ if (InsertIdx) {
+ auto *It =
+ find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
+ // Checks if 2 insertelements are from the same buildvector.
+ InsertElementInst *VecInsert = Data.InsertElements.front();
+ return areTwoInsertFromSameBuildVector(VU, VecInsert);
+ });
+ unsigned Idx = *InsertIdx;
+ if (It == ShuffledInserts.end()) {
+ (void)ShuffledInserts.emplace_back();
+ It = std::next(ShuffledInserts.begin(),
+ ShuffledInserts.size() - 1);
+ SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
+ if (IEBase != User &&
+ (!IEBase->hasOneUse() ||
+ getInsertIndex(IEBase).getValueOr(Idx) == Idx))
+ break;
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(IEBase)) {
+ do {
+ IEBase = cast<InsertElementInst>(Base);
+ int IEIdx = *getInsertIndex(IEBase);
+ assert(Mask[Idx] == UndefMaskElem &&
+ "InsertElementInstruction used already.");
+ Mask[IEIdx] = IEIdx;
+ Base = IEBase->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ // After the vectorization the def-use chain has changed, need
+ // to look through original insertelement instructions, if they
+ // get replaced by vector instructions.
+ auto It = VectorToInsertElement.find(Base);
+ if (It != VectorToInsertElement.end())
+ Base = It->second;
+ }
+ }
+ SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
+ if (Mask.empty())
+ Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask[Idx] = ExternalUse.Lane;
+ It->InsertElements.push_back(cast<InsertElementInst>(User));
+ continue;
+ }
+ }
+ }
+ }
+
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
@@ -8240,6 +8334,219 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
}
+ // Checks if the mask is an identity mask.
+ auto &&IsIdentityMask = [](ArrayRef<int> Mask, FixedVectorType *VecTy) {
+ int Limit = Mask.size();
+ return VecTy->getNumElements() == Mask.size() &&
+ all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
+ ShuffleVectorInst::isIdentityMask(Mask);
+ };
+ // Tries to combine 2
diff erent masks into single one.
+ auto &&CombineMasks = [](SmallVectorImpl<int> &Mask, ArrayRef<int> ExtMask) {
+ SmallVector<int> NewMask(ExtMask.size(), UndefMaskElem);
+ for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
+ if (ExtMask[I] == UndefMaskElem)
+ continue;
+ NewMask[I] = Mask[ExtMask[I]];
+ }
+ Mask.swap(NewMask);
+ };
+ // Peek through shuffles, trying to simplify the final shuffle code.
+ auto &&PeekThroughShuffles =
+ [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl<int> &Mask,
+ bool CheckForLengthChange = false) {
+ while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
+ // Exit if not a fixed vector type or changing size shuffle.
+ if (!isa<FixedVectorType>(SV->getType()) ||
+ (CheckForLengthChange && SV->changesLength()))
+ break;
+ // Exit if the identity or broadcast mask is found.
+ if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
+ SV->isZeroEltSplat())
+ break;
+ bool IsOp1Undef = isUndefVector(SV->getOperand(0));
+ bool IsOp2Undef = isUndefVector(SV->getOperand(1));
+ if (!IsOp1Undef && !IsOp2Undef)
+ break;
+ SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
+ SV->getShuffleMask().end());
+ CombineMasks(ShuffleMask, Mask);
+ Mask.swap(ShuffleMask);
+ if (IsOp2Undef)
+ V = SV->getOperand(0);
+ else
+ V = SV->getOperand(1);
+ }
+ };
+ // Smart shuffle instruction emission, walks through shuffles trees and
+ // tries to find the best matching vector for the actual shuffle
+ // instruction.
+ auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles,
+ &CombineMasks](Value *V1, Value *V2,
+ ArrayRef<int> Mask) -> Value * {
+ assert(V1 && "Expected at least one vector value.");
+ if (V2 && !isUndefVector(V2)) {
+ // Peek through shuffles.
+ Value *Op1 = V1;
+ Value *Op2 = V2;
+ int VF =
+ cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
+ SmallVector<int> CombinedMask1(Mask.size(), UndefMaskElem);
+ SmallVector<int> CombinedMask2(Mask.size(), UndefMaskElem);
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (Mask[I] < VF)
+ CombinedMask1[I] = Mask[I];
+ else
+ CombinedMask2[I] = Mask[I] - VF;
+ }
+ Value *PrevOp1;
+ Value *PrevOp2;
+ do {
+ PrevOp1 = Op1;
+ PrevOp2 = Op2;
+ PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true);
+ PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true);
+ // Check if we have 2 resizing shuffles - need to peek through operands
+ // again.
+ if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
+ if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2))
+ if (SV1->getOperand(0)->getType() ==
+ SV2->getOperand(0)->getType() &&
+ SV1->getOperand(0)->getType() != SV1->getType() &&
+ isUndefVector(SV1->getOperand(1)) &&
+ isUndefVector(SV2->getOperand(1))) {
+ Op1 = SV1->getOperand(0);
+ Op2 = SV2->getOperand(0);
+ SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
+ SV1->getShuffleMask().end());
+ CombineMasks(ShuffleMask1, CombinedMask1);
+ CombinedMask1.swap(ShuffleMask1);
+ SmallVector<int> ShuffleMask2(SV2->getShuffleMask().begin(),
+ SV2->getShuffleMask().end());
+ CombineMasks(ShuffleMask2, CombinedMask2);
+ CombinedMask2.swap(ShuffleMask2);
+ }
+ } while (PrevOp1 != Op1 || PrevOp2 != Op2);
+ VF = cast<VectorType>(Op1->getType())
+ ->getElementCount()
+ .getKnownMinValue();
+ for (int I = 0, E = Mask.size(); I < E; ++I) {
+ if (CombinedMask2[I] != UndefMaskElem) {
+ assert(CombinedMask1[I] == UndefMaskElem &&
+ "Expected undefined mask element");
+ CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
+ }
+ }
+ Value *Vec = Builder.CreateShuffleVector(
+ Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
+ CombinedMask1);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
+ }
+ if (isa<PoisonValue>(V1))
+ return PoisonValue::get(FixedVectorType::get(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size()));
+ Value *Op = V1;
+ SmallVector<int> CombinedMask(Mask.begin(), Mask.end());
+ PeekThroughShuffles(Op, CombinedMask);
+ if (!isa<FixedVectorType>(Op->getType()) ||
+ !IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
+ Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
+ }
+ return Op;
+ };
+
+ auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask) {
+ unsigned VF = Mask.size();
+ unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
+ if (VF != VecVF) {
+ if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); })) {
+ Vec = CreateShuffle(Vec, nullptr, Mask);
+ return std::make_pair(Vec, true);
+ }
+ SmallVector<int> ResizeMask(VF, UndefMaskElem);
+ for (unsigned I = 0; I < VF; ++I) {
+ if (Mask[I] != UndefMaskElem)
+ ResizeMask[Mask[I]] = Mask[I];
+ }
+ Vec = CreateShuffle(Vec, nullptr, ResizeMask);
+ }
+
+ return std::make_pair(Vec, false);
+ };
+ // Perform shuffling of the vectorize tree entries for better handling of
+ // external extracts.
+ for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) {
+ // Find the first and the last instruction in the list of insertelements.
+ sort(ShuffledInserts[I].InsertElements, isFirstInsertElement);
+ InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front();
+ InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back();
+ Builder.SetInsertPoint(LastInsert);
+ auto Vector = ShuffledInserts[I].ValueMasks.takeVector();
+ Value *NewInst = performExtractsShuffleAction<Value>(
+ makeMutableArrayRef(Vector.data(), Vector.size()),
+ FirstInsert->getOperand(0),
+ [](Value *Vec) {
+ return cast<VectorType>(Vec->getType())
+ ->getElementCount()
+ .getKnownMinValue();
+ },
+ ResizeToVF,
+ [FirstInsert, &CreateShuffle](ArrayRef<int> Mask,
+ ArrayRef<Value *> Vals) {
+ assert((Vals.size() == 1 || Vals.size() == 2) &&
+ "Expected exactly 1 or 2 input values.");
+ if (Vals.size() == 1) {
+ // Do not create shuffle if the mask is a simple identity
+ // non-resizing mask.
+ if (Mask.size() != cast<FixedVectorType>(Vals.front()->getType())
+ ->getNumElements() ||
+ !ShuffleVectorInst::isIdentityMask(Mask))
+ return CreateShuffle(Vals.front(), nullptr, Mask);
+ return Vals.front();
+ }
+ return CreateShuffle(Vals.front() ? Vals.front()
+ : FirstInsert->getOperand(0),
+ Vals.back(), Mask);
+ });
+ auto It = ShuffledInserts[I].InsertElements.rbegin();
+ // Rebuild buildvector chain.
+ InsertElementInst *II = nullptr;
+ if (It != ShuffledInserts[I].InsertElements.rend())
+ II = *It;
+ SmallVector<Instruction *> Inserts;
+ while (It != ShuffledInserts[I].InsertElements.rend()) {
+ assert(II && "Must be an insertelement instruction.");
+ if (*It == II)
+ ++It;
+ else
+ Inserts.push_back(cast<Instruction>(II));
+ II = dyn_cast<InsertElementInst>(II->getOperand(0));
+ }
+ for (Instruction *II : reverse(Inserts)) {
+ II->replaceUsesOfWith(II->getOperand(0), NewInst);
+ if (auto *NewI = dyn_cast<Instruction>(NewInst))
+ if (II->getParent() == NewI->getParent() && II->comesBefore(NewI))
+ II->moveAfter(NewI);
+ NewInst = II;
+ }
+ LastInsert->replaceAllUsesWith(NewInst);
+ for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) {
+ IE->replaceUsesOfWith(IE->getOperand(1),
+ PoisonValue::get(IE->getOperand(1)->getType()));
+ eraseInstruction(IE);
+ }
+ CSEBlocks.insert(LastInsert->getParent());
+ }
+
// For each vectorized value:
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5dc03ab1a3cd1..68c8555bd6d43 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -1312,19 +1312,19 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1,
; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]]
; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 6, i32 2, i32 10, i32 14, i32 5, i32 1, i32 9, i32 13, i32 4, i32 0, i32 8, i32 12>
-; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 7, i32 3, i32 11, i32 15, i32 4, i32 0, i32 8, i32 12, i32 5, i32 1, i32 9, i32 13>
+; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 1, i32 0, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]]
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]]
; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 1, i32 0, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]]
; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]]
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30>
+; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]]
; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]]
; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
-; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> <i32 2, i32 19, i32 0, i32 17, i32 23, i32 6, i32 5, i32 20, i32 27, i32 10, i32 9, i32 24, i32 31, i32 14, i32 13, i32 28>
+; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]]
; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]]
; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index 12a2cd2a6834f..148169fb64de4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index 03a29f48a739e..f5079fab51a6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
index aab848ba73ddb..e25fd9b206334 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -26,19 +26,15 @@ define void @s116_modified(float* %a) {
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3
-; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4
; CHECK-NEXT: ret void
;
%gep0 = getelementptr inbounds float, float* %a, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
index 77174151e9635..281169f7a69fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -10,12 +10,9 @@ define void @test() {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
; CHECK-NEXT: ret void
;
@@ -46,14 +43,10 @@ define void @test1() {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
-; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
-; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1
-; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1
-; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds float, ptr undef, i32 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index 3b0ec888de661..610049129d398 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -4,13 +4,12 @@
define void @b() {
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer
-; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0x7FF8000000000000, float undef, float undef, float undef>, <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0x7FF8000000000000, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float undef, float undef, float undef, float 2.000000e+00>
+; CHECK-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT: store <4 x float> [[TMP4]], ptr undef, align 4
; CHECK-NEXT: ret void
;
entry:
@@ -50,8 +49,7 @@ define void @test(float %a) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -71,7 +69,6 @@ define internal void @test1() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[DOTSROA_025_4_VEC_INSERT_US_I:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
index 4463f94b1c557..36347b4843fe7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -52,16 +52,12 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) {
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
-; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
-; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
-; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3
-; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1
-; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0
-; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
+; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
index c9590ae76f405..402211b4ef73e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -13,21 +13,19 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.
; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4
; CHECK-NEXT: br label [[DO_BODY66_I:%.*]]
; CHECK: do.body66.i:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> <i32 undef, i32 poison>, i32 [[TMP2]], i32 1
; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]]
; CHECK: if.else.i:
-; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef
+; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef
; CHECK-NEXT: br label [[DO_COND_I]]
; CHECK: do.cond.i:
-; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ]
+; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ]
; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]]
; CHECK: do.end1006.i:
-; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 30abc6f9fbe87..9b183969001c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -36,17 +36,15 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.
; CHECK: if.then325:
; CHECK-NEXT: br label [[IF_END327]]
; CHECK: if.end327:
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[TMP4]], i32 0
; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]]
; CHECK: if.then329:
; CHECK-NEXT: br label [[IF_END332]]
; CHECK: if.end332:
-; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>*
-; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>*
+; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index ef1ed33ffcb99..0344c5a82fbed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -32,7 +32,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
index 94739340c8b5a..0ab2cf94db827 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -35,7 +35,7 @@ define void @exceed(double %0, double %1) {
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]]
; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> <i32 3, i32 1>
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef
; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [
; CHECK-NEXT: i32 0, label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index fa8c8a14e891c..e87a5190d4721 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -129,11 +129,10 @@ define fastcc void @dct36(double* %inbuf) {
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double poison, double undef>, double [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index bbd71825f96c6..1c7cd3fd265d5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
-; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index cd05b940be221..6b9fdb23714fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -20,18 +20,15 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
-; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0
-; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index 723c12d9b05b6..9bfde3976d2cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -115,16 +115,15 @@ define void @_Z8radianceRK3RayiPt() #0 {
; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double undef, double poison>, double undef, i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef
+; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.then78:
; CHECK-NEXT: br label [[RETURN]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
index 4c15a89f8f005..ea4de7916097d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
@@ -82,23 +82,19 @@ define float @f_used_twice_in_tree(<2 x float> %x) {
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f_used_twice_in_tree(
-; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
-; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
-; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
-; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
-; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
-; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
+; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
+; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
-; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
-; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
-; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
-; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
-; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
-; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
+; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
+; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index f446f3682e580..bafc67c9ea31f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -896,12 +896,11 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0
; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float>
-; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
-; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+00>, float [[TMP6]], i32 0
-; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]]
-; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
-; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
-; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]]
+; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> <float poison, float 3.000000e+00>, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
+; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]]
+; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
+; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
+; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]]
; THRESHOLD-NEXT: ret float [[OP_RDX3]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index 7c4aed1453c97..801b4863e7c19 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1100,15 +1100,14 @@ define i32 @maxi8_wrong_parent(i32) {
; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0
; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1
-; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
-; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1
-; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
-; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1
-; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]]
-; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
-; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
-; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
-; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]]
+; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 2>
+; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
+; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]]
+; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
+; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
+; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
+; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]]
; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]]
; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]]
; THRESH-NEXT: ret i32 [[OP_RDX5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
index 470fa0caa1cd1..01979d12fb3ac 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -439,12 +439,9 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @reschedule_extract(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -477,12 +474,9 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @take_credit(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -539,12 +533,9 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @_vadd256(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> <i32 7, i32 15>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index d7adf196f7c2a..06cdff548a91f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -474,12 +474,9 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @reschedule_extract(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -512,12 +509,9 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @take_credit(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -574,12 +568,9 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @_vadd256(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
-; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
-; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
-; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> <i32 7, i32 15>
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
index 5a9273d1e5b3e..b64c0e4e35a23 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
@@ -19,16 +19,10 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) {
; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
-; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
-; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
-; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1
-; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0
-; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0
+; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index d785f8e76798b..011ce7352079c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -9,10 +9,9 @@ define i32 @fn1() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, i32 [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
; CHECK-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
index 31c5d5b07f0e0..85aa8aed5f5a5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
@@ -58,18 +58,11 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
-; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4
; CHECK-NEXT: ret i32 undef
;
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
index 6bbde0957743c..78572209687b8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
@@ -39,14 +39,13 @@ define dso_local void @j() local_unnamed_addr {
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
; CHECK-NEXT: store float [[TMP15]], float* @f, align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float poison, float -1.000000e+00, float poison, float -1.000000e+00>, float [[CONV19]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2
-; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]]
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32>
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32>
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
index 3d7d632cca3d6..6beff0162ffd2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
@@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3
-; CHECK-NEXT: ret <4 x float> [[I3]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x float> [[TMP4]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
index 509c37f5da28c..dd08e7983d70c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -59,9 +59,8 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3
-; CHECK-NEXT: ret <4 x float> [[I3]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x float> [[TMP4]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 36b867d4a148f..faf86fbcafb9d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -722,15 +722,11 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub
; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
-; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
-; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
-; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
-; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
-; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
-; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
-; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
+; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
+; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
+; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]]
; SSE-NEXT: ret double [[ADD3]]
;
; AVX-LABEL: @splat_loads(
@@ -791,17 +787,13 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2,
; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
-; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
-; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
-; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
-; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
-; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
-; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]]
-; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
-; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
-; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]]
+; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
+; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
+; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
+; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]]
+; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
+; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
+; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]]
; SSE-NEXT: ret double [[RES]]
;
; AVX-LABEL: @splat_loads_with_internal_uses(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
index 464288ef340c8..fc18281c3f35e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
@@ -29,10 +29,9 @@ define void @f(i1 %x) #0 {
; CHECK: if.then:
; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]]
-; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index df767259b4f47..c6725da8a9869 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -151,7 +151,7 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00
@@ -163,25 +163,22 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>*
; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
-; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 undef, i32 2, i32 3>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
+; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0
-; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1
-; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2
-; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3
-; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0
+; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1
+; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]]
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2
+; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]]
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3
+; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]]
; CHECK-NEXT: ret float [[ADD31]]
;
entry:
@@ -244,27 +241,20 @@ define float @sort_phi_type(float* nocapture readonly %A) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3
-; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>
+; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
-; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
-; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
-; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
+; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
+; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]]
; CHECK-NEXT: ret float [[ADD31]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
index 8ab5fb7c7ceec..595b5c9d1079b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
@@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
-; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
-; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1
-; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0
-; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
index 3177f8557315d..e97a3a4521482 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
@@ -10,16 +10,10 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
-; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
-; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
-; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1
-; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0
-; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
index 1fd2f30bf93cc..aa93abaf4d9cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
@@ -10,18 +10,10 @@ define i64 @test() {
; CHECK: bb3:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]])
-; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64
; CHECK-NEXT: ret i64 [[TMP65]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index 7ab7f3b67634b..f71e214fe8938 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -9,12 +9,10 @@ define void @fextr(i16* %ptr) {
; CHECK-NEXT: br label [[T:%.*]]
; CHECK: t:
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2
; CHECK-NEXT: ret void
;
; YAML: Pass: slp-vectorizer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index 3315e71ae2aa6..fc1a673d818f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -12,22 +12,15 @@ define void @test() {
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]]
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3
-; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]]
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds i8, i8* undef, i64 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
index 9bbe2d5d673c7..bb0a39b7b6fb8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
@@ -7,21 +7,18 @@ define i32 @test(i32* %isec) {
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]]
; CHECK: block1:
; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]]
; CHECK: block2:
; CHECK-NEXT: br label [[BLOCK3]]
; CHECK: block3:
-; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]]
-; CHECK-NEXT: ret i32 [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: ret i32 [[TMP6]]
;
entry:
%arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
index 297aa493ed071..0a07e03bac72c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
@@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) {
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
-; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
-; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5
-; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
-; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
index b0a406e6a8f51..3cf139ea0469b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
@@ -49,15 +49,9 @@ define void @test(i32* nocapture %t2) {
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
-; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
-; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5
-; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
-; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
+; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
index 109c27e4f4f4e..f2fbf1dfc756a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -12,7 +12,7 @@ define void @foo() {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]]
; CHECK: bb4:
@@ -23,17 +23,13 @@ define void @foo() {
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]]
-; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float>
-; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
+; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
; CHECK-NEXT: br label [[BB2]]
;
entry:
More information about the llvm-commits
mailing list