[llvm] aeb1981 - Revert "[SLP]Do not emit extract elements for insertelements users, replace with shuffles directly."
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat May 21 13:01:15 PDT 2022
Author: Florian Hahn
Date: 2022-05-21T21:00:01+01:00
New Revision: aeb19817d66f1a15754163c7f48e01e9ebdd6d45
URL: https://github.com/llvm/llvm-project/commit/aeb19817d66f1a15754163c7f48e01e9ebdd6d45
DIFF: https://github.com/llvm/llvm-project/commit/aeb19817d66f1a15754163c7f48e01e9ebdd6d45.diff
LOG: Revert "[SLP]Do not emit extract elements for insertelements users, replace with shuffles directly."
This reverts commit fc9c59c355cb255446e571b4515b5e41a76503c4.
The patch triggers an assertion when building SPEC on X86. Reduced
reproducer shared at D107966.
Also reverts follow-up commit 11a09af76d11ad5a9f1f95b561112af17ff81f80.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
llvm/test/Transforms/SLPVectorizer/X86/phi.ll
llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ab556d699f163..e366044353b1f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6668,23 +6668,6 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
llvm_unreachable("Two
diff erent buildvectors not expected.");
}
-namespace {
-/// Returns incoming Value *, if the requested type is Value * too, or a default
-/// value, otherwise.
-struct ValueSelect {
- template <typename U>
- static typename std::enable_if<std::is_same<Value *, U>::value, Value *>::type
- get(Value *V) {
- return V;
- }
- template <typename U>
- static typename std::enable_if<!std::is_same<Value *, U>::value, U>::type
- get(Value *) {
- return U();
- }
-};
-} // namespace
-
/// Does the analysis of the provided shuffle masks and performs the requested
/// actions on the vectors with the given shuffle masks. It tries to do it in
/// several steps.
@@ -6717,10 +6700,6 @@ static T *performExtractsShuffleAction(
else
Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
}
- auto *V = ValueSelect::get<T *>(Base);
- (void)V;
- assert((!V || GetVF(V) == Mask.size()) &&
- "Expected base vector of VF number of elements.");
Prev = Action(Mask, {nullptr, Res.first});
} else if (ShuffleMask.size() == 1) {
// Base is undef and only 1 vector is shuffled - perform the action only for
@@ -8126,17 +8105,6 @@ Value *BoUpSLP::vectorizeTree() {
return vectorizeTree(ExternallyUsedValues);
}
-namespace {
-/// Data type for handling buildvector sequences with the reused scalars from
-/// other tree entries.
-struct ShuffledInsertData {
- /// List of insertelements to be replaced by shuffles.
- SmallVector<InsertElementInst *> InsertElements;
- /// The parent vectors and shuffle mask for the given list of inserts.
- MapVector<Value *, SmallVector<int>> ValueMasks;
-};
-} // namespace
-
Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
@@ -8170,9 +8138,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
<< " values .\n");
- SmallVector<ShuffledInsertData> ShuffledInserts;
- // Maps vector instruction to original insertelement instruction
- DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
@@ -8212,8 +8177,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
"In-tree scalar of vector type is not insertelement?");
- auto *IE = cast<InsertElementInst>(Scalar);
- VectorToInsertElement.try_emplace(Vec, IE);
return Vec;
};
// If User == nullptr, the Scalar is used as extra arg. Generate
@@ -8242,64 +8205,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
continue;
}
- if (auto *VU = dyn_cast<InsertElementInst>(User)) {
- if (!Scalar->getType()->isVectorTy()) {
- if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
- Optional<unsigned> InsertIdx = getInsertIndex(VU);
- if (InsertIdx) {
- auto *It =
- find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
- // Checks if 2 insertelements are from the same buildvector.
- InsertElementInst *VecInsert = Data.InsertElements.front();
- return areTwoInsertFromSameBuildVector(VU, VecInsert);
- });
- unsigned Idx = *InsertIdx;
- if (It == ShuffledInserts.end()) {
- (void)ShuffledInserts.emplace_back();
- It = std::next(ShuffledInserts.begin(),
- ShuffledInserts.size() - 1);
- SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
- if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
- // Find the insertvector, vectorized in tree, if any.
- Value *Base = VU;
- while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
- if (IEBase != User &&
- (!IEBase->hasOneUse() ||
- getInsertIndex(IEBase).getValueOr(Idx) == Idx))
- break;
- // Build the mask for the vectorized insertelement instructions.
- if (const TreeEntry *E = getTreeEntry(IEBase)) {
- do {
- IEBase = cast<InsertElementInst>(Base);
- int IEIdx = *getInsertIndex(IEBase);
- assert(Mask[Idx] == UndefMaskElem &&
- "InsertElementInstruction used already.");
- Mask[IEIdx] = IEIdx;
- Base = IEBase->getOperand(0);
- } while (E == getTreeEntry(Base));
- break;
- }
- Base = cast<InsertElementInst>(Base)->getOperand(0);
- // After the vectorization the def-use chain has changed, need
- // to look through original insertelement instructions, if they
- // get replaced by vector instructions.
- auto It = VectorToInsertElement.find(Base);
- if (It != VectorToInsertElement.end())
- Base = It->second;
- }
- }
- SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
- if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
- Mask[Idx] = ExternalUse.Lane;
- It->InsertElements.push_back(cast<InsertElementInst>(User));
- continue;
- }
- }
- }
- }
-
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
@@ -8335,219 +8240,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
}
- // Checks if the mask is an identity mask.
- auto &&IsIdentityMask = [](ArrayRef<int> Mask, FixedVectorType *VecTy) {
- int Limit = Mask.size();
- return VecTy->getNumElements() == Mask.size() &&
- all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(Mask);
- };
- // Tries to combine 2
diff erent masks into single one.
- auto &&CombineMasks = [](SmallVectorImpl<int> &Mask, ArrayRef<int> ExtMask) {
- SmallVector<int> NewMask(ExtMask.size(), UndefMaskElem);
- for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
- if (ExtMask[I] == UndefMaskElem)
- continue;
- NewMask[I] = Mask[ExtMask[I]];
- }
- Mask.swap(NewMask);
- };
- // Peek through shuffles, trying to simplify the final shuffle code.
- auto &&PeekThroughShuffles =
- [&IsIdentityMask, &CombineMasks](Value *&V, SmallVectorImpl<int> &Mask,
- bool CheckForLengthChange = false) {
- while (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
- // Exit if not a fixed vector type or changing size shuffle.
- if (!isa<FixedVectorType>(SV->getType()) ||
- (CheckForLengthChange && SV->changesLength()))
- break;
- // Exit if the identity or broadcast mask is found.
- if (IsIdentityMask(Mask, cast<FixedVectorType>(SV->getType())) ||
- SV->isZeroEltSplat())
- break;
- bool IsOp1Undef = isUndefVector(SV->getOperand(0));
- bool IsOp2Undef = isUndefVector(SV->getOperand(1));
- if (!IsOp1Undef && !IsOp2Undef)
- break;
- SmallVector<int> ShuffleMask(SV->getShuffleMask().begin(),
- SV->getShuffleMask().end());
- CombineMasks(ShuffleMask, Mask);
- Mask.swap(ShuffleMask);
- if (IsOp2Undef)
- V = SV->getOperand(0);
- else
- V = SV->getOperand(1);
- }
- };
- // Smart shuffle instruction emission, walks through shuffles trees and
- // tries to find the best matching vector for the actual shuffle
- // instruction.
- auto &&CreateShuffle = [this, &IsIdentityMask, &PeekThroughShuffles,
- &CombineMasks](Value *V1, Value *V2,
- ArrayRef<int> Mask) -> Value * {
- assert(V1 && "Expected at least one vector value.");
- if (V2 && !isUndefVector(V2)) {
- // Peek through shuffles.
- Value *Op1 = V1;
- Value *Op2 = V2;
- int VF =
- cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
- SmallVector<int> CombinedMask1(Mask.size(), UndefMaskElem);
- SmallVector<int> CombinedMask2(Mask.size(), UndefMaskElem);
- for (int I = 0, E = Mask.size(); I < E; ++I) {
- if (Mask[I] < VF)
- CombinedMask1[I] = Mask[I];
- else
- CombinedMask2[I] = Mask[I] - VF;
- }
- Value *PrevOp1;
- Value *PrevOp2;
- do {
- PrevOp1 = Op1;
- PrevOp2 = Op2;
- PeekThroughShuffles(Op1, CombinedMask1, /*CheckForLengthChange=*/true);
- PeekThroughShuffles(Op2, CombinedMask2, /*CheckForLengthChange=*/true);
- // Check if we have 2 resizing shuffles - need to peek through operands
- // again.
- if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
- if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2))
- if (SV1->getOperand(0)->getType() ==
- SV2->getOperand(0)->getType() &&
- SV1->getOperand(0)->getType() != SV1->getType() &&
- isUndefVector(SV1->getOperand(1)) &&
- isUndefVector(SV2->getOperand(1))) {
- Op1 = SV1->getOperand(0);
- Op2 = SV2->getOperand(0);
- SmallVector<int> ShuffleMask1(SV1->getShuffleMask().begin(),
- SV1->getShuffleMask().end());
- CombineMasks(ShuffleMask1, CombinedMask1);
- CombinedMask1.swap(ShuffleMask1);
- SmallVector<int> ShuffleMask2(SV2->getShuffleMask().begin(),
- SV2->getShuffleMask().end());
- CombineMasks(ShuffleMask2, CombinedMask2);
- CombinedMask2.swap(ShuffleMask2);
- }
- } while (PrevOp1 != Op1 || PrevOp2 != Op2);
- VF = cast<VectorType>(Op1->getType())
- ->getElementCount()
- .getKnownMinValue();
- for (int I = 0, E = Mask.size(); I < E; ++I) {
- if (CombinedMask2[I] != UndefMaskElem) {
- assert(CombinedMask1[I] == UndefMaskElem &&
- "Expected undefined mask element");
- CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
- }
- }
- Value *Vec = Builder.CreateShuffleVector(
- Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
- CombinedMask1);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherShuffleSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- return Vec;
- }
- if (isa<PoisonValue>(V1))
- return PoisonValue::get(FixedVectorType::get(
- cast<VectorType>(V1->getType())->getElementType(), Mask.size()));
- Value *Op = V1;
- SmallVector<int> CombinedMask(Mask.begin(), Mask.end());
- PeekThroughShuffles(Op, CombinedMask);
- if (!isa<FixedVectorType>(Op->getType()) ||
- !IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
- Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherShuffleSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- return Vec;
- }
- return Op;
- };
-
- auto &&ResizeToVF = [&CreateShuffle](Value *Vec, ArrayRef<int> Mask) {
- unsigned VF = Mask.size();
- unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
- if (VF != VecVF) {
- if (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); })) {
- Vec = CreateShuffle(Vec, nullptr, Mask);
- return std::make_pair(Vec, true);
- }
- SmallVector<int> ResizeMask(VF, UndefMaskElem);
- for (unsigned I = 0; I < VF; ++I) {
- if (Mask[I] != UndefMaskElem)
- ResizeMask[Mask[I]] = Mask[I];
- }
- Vec = CreateShuffle(Vec, nullptr, ResizeMask);
- }
-
- return std::make_pair(Vec, false);
- };
- // Perform shuffling of the vectorize tree entries for better handling of
- // external extracts.
- for (int I = 0, E = ShuffledInserts.size(); I < E; ++I) {
- // Find the first and the last instruction in the list of insertelements.
- sort(ShuffledInserts[I].InsertElements, isFirstInsertElement);
- InsertElementInst *FirstInsert = ShuffledInserts[I].InsertElements.front();
- InsertElementInst *LastInsert = ShuffledInserts[I].InsertElements.back();
- Builder.SetInsertPoint(LastInsert);
- auto Vector = ShuffledInserts[I].ValueMasks.takeVector();
- Value *NewInst = performExtractsShuffleAction<Value>(
- makeMutableArrayRef(Vector.data(), Vector.size()),
- FirstInsert->getOperand(0),
- [](Value *Vec) {
- return cast<VectorType>(Vec->getType())
- ->getElementCount()
- .getKnownMinValue();
- },
- ResizeToVF,
- [FirstInsert, &CreateShuffle](ArrayRef<int> Mask,
- ArrayRef<Value *> Vals) {
- assert((Vals.size() == 1 || Vals.size() == 2) &&
- "Expected exactly 1 or 2 input values.");
- if (Vals.size() == 1) {
- // Do not create shuffle if the mask is a simple identity
- // non-resizing mask.
- if (Mask.size() != cast<FixedVectorType>(Vals.front()->getType())
- ->getNumElements() ||
- !ShuffleVectorInst::isIdentityMask(Mask))
- return CreateShuffle(Vals.front(), nullptr, Mask);
- return Vals.front();
- }
- return CreateShuffle(Vals.front() ? Vals.front()
- : FirstInsert->getOperand(0),
- Vals.back(), Mask);
- });
- auto It = ShuffledInserts[I].InsertElements.rbegin();
- // Rebuild buildvector chain.
- InsertElementInst *II = nullptr;
- if (It != ShuffledInserts[I].InsertElements.rend())
- II = *It;
- SmallVector<Instruction *> Inserts;
- while (It != ShuffledInserts[I].InsertElements.rend()) {
- assert(II && "Must be an insertelement instruction.");
- if (*It == II)
- ++It;
- else
- Inserts.push_back(cast<Instruction>(II));
- II = dyn_cast<InsertElementInst>(II->getOperand(0));
- }
- for (Instruction *II : reverse(Inserts)) {
- II->replaceUsesOfWith(II->getOperand(0), NewInst);
- if (auto *NewI = dyn_cast<Instruction>(NewInst))
- if (II->getParent() == NewI->getParent() && II->comesBefore(NewI))
- II->moveAfter(NewI);
- NewInst = II;
- }
- LastInsert->replaceAllUsesWith(NewInst);
- for (InsertElementInst *IE : reverse(ShuffledInserts[I].InsertElements)) {
- IE->replaceUsesOfWith(IE->getOperand(1),
- PoisonValue::get(IE->getOperand(1)->getType()));
- eraseInstruction(IE);
- }
- CSEBlocks.insert(LastInsert->getParent());
- }
-
// For each vectorized value:
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 68c8555bd6d43..5dc03ab1a3cd1 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -1312,19 +1312,19 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1,
; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP56]]
; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 6, i32 2, i32 10, i32 14, i32 5, i32 1, i32 9, i32 13, i32 4, i32 0, i32 8, i32 12>
-; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> poison, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 1, i32 0, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 7, i32 3, i32 11, i32 15, i32 4, i32 0, i32 8, i32 12, i32 5, i32 1, i32 9, i32 13>
; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP60]], [[TMP61]]
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP61]]
; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> poison, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 1, i32 0, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23>
; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP64]], [[TMP65]]
; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP65]]
; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30>
; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP68]], [[TMP69]]
; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP68]], [[TMP69]]
; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
-; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP72]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> <i32 2, i32 19, i32 0, i32 17, i32 23, i32 6, i32 5, i32 20, i32 27, i32 10, i32 9, i32 24, i32 31, i32 14, i32 13, i32 28>
; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP72]], [[TMP73]]
; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP73]]
; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index 148169fb64de4..12a2cd2a6834f 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index f5079fab51a6a..03a29f48a739e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -139,7 +139,7 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) {
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0_1]], i64 0
; CHECK-NEXT: [[TMP7:%.*]] = sub <2 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]]
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
index e25fd9b206334..aab848ba73ddb 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -26,15 +26,19 @@ define void @s116_modified(float* %a) {
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP6]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
-; CHECK-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP13]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP17]], align 4
; CHECK-NEXT: ret void
;
%gep0 = getelementptr inbounds float, float* %a, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
index 281169f7a69fe..77174151e9635 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -10,9 +10,12 @@ define void @test() {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0
; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4
; CHECK-NEXT: ret void
;
@@ -43,10 +46,14 @@ define void @test1() {
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: store <2 x float> [[TMP8]], ptr null, align 4
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: store <2 x float> [[TMP9]], ptr null, align 4
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT_I5_I10:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
+; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP9]], i64 1
+; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13]], ptr null, align 4
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT_I10_I13_2:%.*]] = insertelement <2 x float> [[DOTSROA_0_0_VEC_INSERT_I5_I10]], float [[TMP10]], i64 1
+; CHECK-NEXT: store <2 x float> [[DOTSROA_0_4_VEC_INSERT_I10_I13_2]], ptr null, align 4
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds float, ptr undef, i32 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index 610049129d398..3b0ec888de661 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -4,12 +4,13 @@
define void @b() {
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0x7FF8000000000000, float undef, float undef, float undef>, <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0x7FF8000000000000, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float undef, float undef, float undef, float 2.000000e+00>
-; CHECK-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP3]], zeroinitializer
-; CHECK-NEXT: store <4 x float> [[TMP4]], ptr undef, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer
+; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4
; CHECK-NEXT: ret void
;
entry:
@@ -49,7 +50,8 @@ define void @test(float %a) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
+; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -69,6 +71,7 @@ define internal void @test1() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
+; CHECK-NEXT: [[DOTSROA_025_4_VEC_INSERT_US_I:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
; CHECK-NEXT: br label [[LOOP]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
index 36347b4843fe7..4463f94b1c557 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -52,12 +52,16 @@ define { <2 x float>, <2 x float> } @test1(i32 %conv.i32.i.i.i) {
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
-; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP6]], i32 0
+; CHECK-NEXT: [[RETVAL_SROA_0_0_VEC_INSERT4:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP7]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 1
+; CHECK-NEXT: [[RETVAL_SROA_0_4_VEC_INSERT7:%.*]] = insertelement <2 x float> [[RETVAL_SROA_0_0_VEC_INSERT4]], float [[TMP8]], i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
+; CHECK-NEXT: [[RETVAL_SROA_7_8_VEC_INSERT11:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP6]], i32 3
+; CHECK-NEXT: [[RETVAL_SROA_7_12_VEC_INSERT13:%.*]] = insertelement <2 x float> [[RETVAL_SROA_7_8_VEC_INSERT11]], float [[TMP10]], i64 1
+; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[RETVAL_SROA_0_4_VEC_INSERT7]], 0
+; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[RETVAL_SROA_7_12_VEC_INSERT13]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } zeroinitializer
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
index 402211b4ef73e..c9590ae76f405 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -13,19 +13,21 @@ define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.
; CHECK-NEXT: [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4
; CHECK-NEXT: br label [[DO_BODY66_I:%.*]]
; CHECK: do.body66.i:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> <i32 undef, i32 poison>, i32 [[TMP2]], i32 1
; CHECK-NEXT: br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]]
; CHECK: if.else.i:
-; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef
+; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], undef
; CHECK-NEXT: br label [[DO_COND_I]]
; CHECK: do.cond.i:
-; CHECK-NEXT: [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ]
+; CHECK-NEXT: [[TMP5]] = phi <2 x i32> [ [[TMP4]], [[IF_ELSE_I]] ], [ [[TMP3]], [[DO_BODY66_I]] ]
; CHECK-NEXT: br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]]
; CHECK: do.end1006.i:
-; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
-; CHECK-NEXT: store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
+; CHECK-NEXT: store <2 x i32> [[TMP6]], <2 x i32>* [[TMP7]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 9b183969001c8..30abc6f9fbe87 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -36,15 +36,17 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(%class.
; CHECK: if.then325:
; CHECK-NEXT: br label [[IF_END327]]
; CHECK: if.end327:
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[TMP4]], i32 0
; CHECK-NEXT: br i1 undef, label [[IF_THEN329:%.*]], label [[IF_END332]]
; CHECK: if.then329:
; CHECK-NEXT: br label [[IF_END332]]
; CHECK: if.end332:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[IF_THEN329]] ], [ [[TMP5]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP3]], [[TMP6]]
; CHECK-NEXT: [[ARRAYIDX_I_I606:%.*]] = getelementptr inbounds [[CLASS_BTVECTOR3_23_221_463_485_507_573_595_683_727_749_815_837_991_1585_1607_1629_1651_1849_2047_2069_2091_2113:%.*]], %class.btVector3.23.221.463.485.507.573.595.683.727.749.815.837.991.1585.1607.1629.1651.1849.2047.2069.2091.2113* [[VERTICES:%.*]], i64 0, i32 0, i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>*
-; CHECK-NEXT: store <2 x float> [[TMP5]], <2 x float>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[ARRAYIDX_I_I606]] to <2 x float>*
+; CHECK-NEXT: store <2 x float> [[TMP7]], <2 x float>* [[TMP8]], align 4
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index 0344c5a82fbed..ef1ed33ffcb99 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -32,7 +32,7 @@ define void @testfunc(float* nocapture %dest, float* nocapture readonly %src) {
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
; CHECK-NEXT: [[ADD13]] = fadd float [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[ADD13]], i32 1
; CHECK-NEXT: [[TMP16:%.*]] = fcmp olt <2 x float> [[TMP15]], <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP15]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
index 0ab2cf94db827..94739340c8b5a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -35,7 +35,7 @@ define void @exceed(double %0, double %1) {
; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP11]], [[TMP9]]
; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP6]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[TMP7]], i32 0
; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[TMP14]], undef
; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [
; CHECK-NEXT: i32 0, label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index e87a5190d4721..fa8c8a14e891c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -129,10 +129,11 @@ define fastcc void @dct36(double* %inbuf) {
; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double poison, double undef>, double [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 1c7cd3fd265d5..bbd71825f96c6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -20,15 +20,18 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
-; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 6b9fdb23714fb..cd05b940be221 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -20,15 +20,18 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
+; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
+; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
+; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
-; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
+; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0
+; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index 9bfde3976d2cc..723c12d9b05b6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -115,15 +115,16 @@ define void @_Z8radianceRK3RayiPt() #0 {
; CHECK-NEXT: br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
-; CHECK-NEXT: [[TMP0:%.*]] = fmul <2 x double> undef, undef
-; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> undef, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> undef, [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double undef, double poison>, double undef, i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> undef, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> undef, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.then78:
; CHECK-NEXT: br label [[RETURN]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
index ea4de7916097d..4c15a89f8f005 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement.ll
@@ -82,19 +82,23 @@ define float @f_used_twice_in_tree(<2 x float> %x) {
; CHECK-NEXT: ret float [[ADD]]
;
; THRESH1-LABEL: @f_used_twice_in_tree(
-; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
-; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
-; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; THRESH1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
+; THRESH1-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
+; THRESH1-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH1-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH1-NEXT: ret float [[ADD]]
;
; THRESH2-LABEL: @f_used_twice_in_tree(
-; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
-; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]]
-; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
-; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]]
+; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1
+; THRESH2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
+; THRESH2-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1
+; THRESH2-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[X]]
+; THRESH2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; THRESH2-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP5]], [[TMP6]]
; THRESH2-NEXT: ret float [[ADD]]
;
%x0 = extractelement <2 x float> %x, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index bafc67c9ea31f..f446f3682e580 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -896,11 +896,12 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b
; THRESHOLD-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 1
; THRESHOLD-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[MUL]], i32 0
; THRESHOLD-NEXT: [[TMP5:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x float>
-; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> <float poison, float 3.000000e+00>, <2 x float> [[TMP5]], <2 x i32> <i32 2, i32 1>
-; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP6]]
-; THRESHOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
-; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
-; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
+; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
+; THRESHOLD-NEXT: [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+00>, float [[TMP6]], i32 0
+; THRESHOLD-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP5]], [[TMP7]]
+; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
+; THRESHOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
+; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[TMP9]], [[TMP10]]
; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[TMP2]], [[OP_RDX2]]
; THRESHOLD-NEXT: ret float [[OP_RDX3]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index 801b4863e7c19..7c4aed1453c97 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1100,14 +1100,15 @@ define i32 @maxi8_wrong_parent(i32) {
; THRESH-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i1> poison, i1 [[OP_RDX]], i32 0
; THRESH-NEXT: [[TMP12:%.*]] = insertelement <2 x i1> [[TMP11]], i1 [[TMP5]], i32 1
-; THRESH-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 2>
-; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
-; THRESH-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
-; THRESH-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP15]]
-; THRESH-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
-; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
-; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
-; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP17]], i32 [[TMP18]]
+; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
+; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP3]], i32 1
+; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
+; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP4]], i32 1
+; THRESH-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP14]], <2 x i32> [[TMP16]]
+; THRESH-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
+; THRESH-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
+; THRESH-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]]
+; THRESH-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP18]], i32 [[TMP19]]
; THRESH-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[TMP8]], [[OP_RDX3]]
; THRESH-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[TMP8]], i32 [[OP_RDX3]]
; THRESH-NEXT: ret i32 [[OP_RDX5]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
index 01979d12fb3ac..470fa0caa1cd1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -439,9 +439,12 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @reschedule_extract(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -474,9 +477,12 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @take_credit(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -533,9 +539,12 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @_vadd256(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> <i32 7, i32 15>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 06cdff548a91f..d7adf196f7c2a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -474,9 +474,12 @@ define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @reschedule_extract(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -509,9 +512,12 @@ define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
; NOTHRESHOLD-NEXT: ret <4 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @take_credit(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <2 x i32> <i32 3, i32 7>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <4 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <4 x float> [[TMP5]]
;
%a0 = extractelement <4 x float> %a, i32 0
%b0 = extractelement <4 x float> %b, i32 0
@@ -568,9 +574,12 @@ define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr
; NOTHRESHOLD-NEXT: ret <8 x float> [[TMP1]]
;
; MINTREESIZE-LABEL: @_vadd256(
-; MINTREESIZE-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <2 x i32> <i32 7, i32 15>
-; MINTREESIZE-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
-; MINTREESIZE-NEXT: ret <8 x float> [[TMP2]]
+; MINTREESIZE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[B:%.*]], i32 7
+; MINTREESIZE-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[A:%.*]], i32 7
+; MINTREESIZE-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
+; MINTREESIZE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP1]], i32 1
+; MINTREESIZE-NEXT: [[TMP5:%.*]] = fadd <8 x float> [[A]], [[B]]
+; MINTREESIZE-NEXT: ret <8 x float> [[TMP5]]
;
%vecext = extractelement <8 x float> %a, i32 0
%vecext1 = extractelement <8 x float> %b, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
index b64c0e4e35a23..5a9273d1e5b3e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
@@ -19,10 +19,16 @@ define { <2 x float>, <2 x float> } @foo(%struct.sw* %v) {
; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP10]], 0
-; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[TMP11]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT: [[VEC1:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
+; CHECK-NEXT: [[VEC2:%.*]] = insertelement <2 x float> [[VEC1]], float [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
+; CHECK-NEXT: [[VEC3:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
+; CHECK-NEXT: [[VEC4:%.*]] = insertelement <2 x float> [[VEC3]], float [[TMP13]], i32 1
+; CHECK-NEXT: [[INS1:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VEC2]], 0
+; CHECK-NEXT: [[INS2:%.*]] = insertvalue { <2 x float>, <2 x float> } [[INS1]], <2 x float> [[VEC4]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[INS2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index 011ce7352079c..d785f8e76798b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -9,9 +9,10 @@ define i32 @fn1() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
; CHECK-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
index 85aa8aed5f5a5..31c5d5b07f0e0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
@@ -58,11 +58,18 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
+; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: ret i32 undef
;
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
index 78572209687b8..6bbde0957743c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll
@@ -39,13 +39,14 @@ define dso_local void @j() local_unnamed_addr {
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
; CHECK-NEXT: store float [[TMP15]], float* @f, align 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> <float poison, float -1.000000e+00, float poison, float -1.000000e+00>, float [[CONV19]], i32 0
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> [[SHUFFLE]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT: [[TMP18:%.*]] = fsub <4 x float> [[TMP11]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = fadd <4 x float> [[TMP11]], [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP19]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP21:%.*]] = fptosi <4 x float> [[TMP20]] to <4 x i32>
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* [[TMP22]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[SHUFFLE]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP17]], i32 2
+; CHECK-NEXT: [[TMP19:%.*]] = fsub <4 x float> [[TMP11]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = fadd <4 x float> [[TMP11]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP19]], <4 x float> [[TMP20]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP22:%.*]] = fptosi <4 x float> [[TMP21]] to <4 x i32>
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX1]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
index 6beff0162ffd2..3d7d632cca3d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll
@@ -59,8 +59,9 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
-; CHECK-NEXT: ret <4 x float> [[TMP4]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[I3]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
index dd08e7983d70c..509c37f5da28c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -59,8 +59,9 @@ define <4 x float> @PR16739_byref(<4 x float>* nocapture readonly dereferenceabl
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
-; CHECK-NEXT: ret <4 x float> [[TMP4]]
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I21]], float [[TMP4]], i32 3
+; CHECK-NEXT: ret <4 x float> [[I3]]
;
%gep0 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 0
%gep1 = getelementptr inbounds <4 x float>, <4 x float>* %x, i64 0, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index faf86fbcafb9d..36b867d4a148f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -722,11 +722,15 @@ define double @splat_loads(double *%array1, double *%array2, double *%ptrA, doub
; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
-; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; SSE-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP7]], [[TMP8]]
+; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
+; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
+; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
+; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
+; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
+; SSE-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
+; SSE-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
+; SSE-NEXT: [[ADD3:%.*]] = fadd double [[TMP11]], [[TMP12]]
; SSE-NEXT: ret double [[ADD3]]
;
; AVX-LABEL: @splat_loads(
@@ -787,13 +791,17 @@ define double @splat_loads_with_internal_uses(double *%array1, double *%array2,
; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
; SSE-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
; SSE-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[SHUFFLE]]
-; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
-; SSE-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP6]], [[TMP7]]
-; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP8]], i32 0
-; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP8]], i32 1
-; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP9]], [[TMP10]]
+; SSE-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 1
+; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; SSE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SHUFFLE]], i32 0
+; SSE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
+; SSE-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP1]], [[TMP8]]
+; SSE-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP4]], [[TMP9]]
+; SSE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
+; SSE-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP10]], [[TMP11]]
+; SSE-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP12]], i32 0
+; SSE-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP12]], i32 1
+; SSE-NEXT: [[RES:%.*]] = fadd double [[TMP13]], [[TMP14]]
; SSE-NEXT: ret double [[RES]]
;
; AVX-LABEL: @splat_loads_with_internal_uses(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
index fc18281c3f35e..464288ef340c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
@@ -29,9 +29,10 @@ define void @f(i1 %x) #0 {
; CHECK: if.then:
; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP8]], 8
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = and <2 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i64> [[TMP11]], [[TMP7]]
+; CHECK-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (%struct.a* @a to <2 x i64>*), align 8
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index c6725da8a9869..df767259b4f47 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -151,7 +151,7 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00
@@ -163,22 +163,25 @@ define float @foo3(float* nocapture readonly %A) #0 {
; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>*
; CHECK-NEXT: [[TMP12]] = load <2 x float>, <2 x float>* [[TMP11]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 undef, i32 2, i32 3>
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
-; CHECK-NEXT: [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> poison, float [[TMP13]], i32 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP10]], i32 1
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP17]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
+; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0
-; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1
-; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2
-; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3
-; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]]
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 0
+; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 1
+; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 2
+; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 3
+; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]]
; CHECK-NEXT: ret float [[ADD31]]
;
entry:
@@ -241,20 +244,27 @@ define float @sort_phi_type(float* nocapture readonly %A) {
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
-; CHECK-NEXT: [[TMP2]] = fmul <4 x float> [[TMP1]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>
+; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP3]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3
+; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
-; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
-; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
-; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
+; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
+; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
+; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP13]]
; CHECK-NEXT: ret float [[ADD31]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
index 595b5c9d1079b..8ab5fb7c7ceec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022-inseltpoison.ll
@@ -10,10 +10,16 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0
-; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
+; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
+; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
index e97a3a4521482..3177f8557315d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll
@@ -10,10 +10,16 @@ define { <2 x float>, <2 x float> } @StructOfVectors(float *%Ptr) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.100000e+01, float 1.200000e+01, float 1.300000e+01, float 1.400000e+01>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP4]], 0
-; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[TMP5]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT: [[VECIN0:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP3]], i32 1
+; CHECK-NEXT: [[VECIN1:%.*]] = insertelement <2 x float> [[VECIN0]], float [[TMP5]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP3]], i32 2
+; CHECK-NEXT: [[VECIN2:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP3]], i32 3
+; CHECK-NEXT: [[VECIN3:%.*]] = insertelement <2 x float> [[VECIN2]], float [[TMP7]], i64 1
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[VECIN1]], 0
+; CHECK-NEXT: [[RET1:%.*]] = insertvalue { <2 x float>, <2 x float> } [[RET0]], <2 x float> [[VECIN3]], 1
; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[RET1]]
;
%GEP0 = getelementptr inbounds float, float* %Ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
index aa93abaf4d9cb..1fd2f30bf93cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-same-vals.ll
@@ -10,10 +10,18 @@ define i64 @test() {
; CHECK: bb3:
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ poison, [[BB2:%.*]] ], [ zeroinitializer, [[BB1:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]])
-; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP1]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP1]], i32 3
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP1]], i32 4
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP1]], i32 5
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP1]], i32 6
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP1]], i32 7
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[SHUFFLE]])
+; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP65:%.*]] = sext i32 [[OP_RDX]] to i64
; CHECK-NEXT: ret i64 [[TMP65]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index f71e214fe8938..7ab7f3b67634b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -9,10 +9,12 @@ define void @fextr(i16* %ptr) {
; CHECK-NEXT: br label [[T:%.*]]
; CHECK: t:
; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-; CHECK-NEXT: [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
; CHECK-NEXT: ret void
;
; YAML: Pass: slp-vectorizer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
index fc1a673d818f9..3315e71ae2aa6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll
@@ -12,15 +12,22 @@ define void @test() {
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[SHUFFLE]]
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw <4 x i32> [[TMP6]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP12]]
-; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 16
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP13]], i32 2
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP15]], i32 3
+; CHECK-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <4 x i32> [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP19]]
+; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 16
; CHECK-NEXT: ret void
;
%1 = getelementptr inbounds i8, i8* undef, i64 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
index bb0a39b7b6fb8..9bbe2d5d673c7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reordered-top-scalars.ll
@@ -7,18 +7,21 @@ define i32 @test(i32* %isec) {
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[ISEC:%.*]], i32 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX10]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i32 1
; CHECK-NEXT: br i1 false, label [[BLOCK1:%.*]], label [[BLOCK3:%.*]]
; CHECK: block1:
; CHECK-NEXT: br i1 false, label [[BLOCK2:%.*]], label [[BLOCK3]]
; CHECK: block2:
; CHECK-NEXT: br label [[BLOCK3]]
; CHECK: block3:
-; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP2]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT: ret i32 [[TMP6]]
+; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP1]], [[BLOCK1]] ], [ [[TMP1]], [[BLOCK2]] ], [ [[TMP5]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP9]]
;
entry:
%arrayidx10 = getelementptr inbounds i32, i32* %isec, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
index 0a07e03bac72c..297aa493ed071 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
@@ -49,9 +49,15 @@ define void @test(i32* nocapture %t2) {
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
-; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
+; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
+; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5
+; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
+; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
index 3cf139ea0469b..b0a406e6a8f51 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
@@ -49,9 +49,15 @@ define void @test(i32* nocapture %t2) {
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 3>
-; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6
-; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP10]], i32 0
+; CHECK-NEXT: [[T69:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP12]], i32 4
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP10]], i32 1
+; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T69]], i32 [[TMP13]], i32 5
+; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP10]], i32 3
+; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP14]], i32 7
+; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
index f2fbf1dfc756a..109c27e4f4f4e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -12,7 +12,7 @@ define void @foo() {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
-; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP18:%.*]], [[BB3:%.*]] ]
; CHECK-NEXT: [[TMP3:%.*]] = load double, double* undef, align 8
; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]]
; CHECK: bb4:
@@ -23,13 +23,17 @@ define void @foo() {
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
-; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
-; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> poison, double [[TMP10]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP12]], i32 1
+; CHECK-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x double> [[TMP13]], [[TMP4]]
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP16:%.*]] = fptrunc <4 x double> [[TMP15]] to <4 x float>
+; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP2]], <4 x float> [[TMP16]]
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT: [[TMP18]] = phi <4 x float> [ [[TMP17]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
; CHECK-NEXT: br label [[BB2]]
;
entry:
More information about the llvm-commits
mailing list