[llvm] 2bb0fa4 - [SLP]Prefer copyable over alternate
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 20 09:00:04 PDT 2026
Author: Alexey Bataev
Date: 2026-03-20T11:59:59-04:00
New Revision: 2bb0fa46a8166b68495ac5623d4cbaad39d95cea
URL: https://github.com/llvm/llvm-project/commit/2bb0fa46a8166b68495ac5623d4cbaad39d95cea
DIFF: https://github.com/llvm/llvm-project/commit/2bb0fa46a8166b68495ac5623d4cbaad39d95cea.diff
LOG: [SLP]Prefer copyable over alternate
If the instructions state is alternate and/or contains non-directly
matching instructions, need to check if it is better to represent such
operations as non-alternate with copyables.
To do this, we need to compare operands between the instructions in their
different representations and choose the best one for optimal
vectorization.
Reviewers: RKSimon, hiraditya
Pull Request: https://github.com/llvm/llvm-project/pull/183777
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/PhaseOrdering/AArch64/scalarize-load-ext-extract.ll
llvm/test/Transforms/PhaseOrdering/X86/avg.ll
llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll
llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
llvm/test/Transforms/SLPVectorizer/X86/commutable-node-with-non-sched-parent.ll
llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
llvm/test/Transforms/SLPVectorizer/X86/resched.ll
llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll
llvm/test/Transforms/SLPVectorizer/X86/shl-compatible-with-add.ll
llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation.ll
llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll
llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f3f4081f84ef7..d55605055c2ec 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2603,6 +2603,19 @@ class slpvectorizer::BoUpSLP {
if (I1 && I2) {
if (I1->getParent() != I2->getParent())
return CheckSameEntryOrFail();
+ Value *V;
+ Value *Cond;
+ // ZExt i1 to something must be considered same opcode for select i1
+ // cmp, x, y
+ // Required to better match the transformation after
+ // BoUpSLP::matchesInversedZExtSelect analysis.
+ if ((match(I1, m_ZExt(m_Value(V))) &&
+ match(I2, m_Select(m_Value(Cond), m_Value(), m_Value())) &&
+ V->getType() == Cond->getType()) ||
+ (match(I2, m_ZExt(m_Value(V))) &&
+ match(I1, m_Select(m_Value(Cond), m_Value(), m_Value())) &&
+ V->getType() == Cond->getType()))
+ return LookAheadHeuristics::ScoreSameOpcode;
SmallVector<Value *, 4> Ops(MainAltOps);
Ops.push_back(I1);
Ops.push_back(I2);
@@ -3550,7 +3563,7 @@ class slpvectorizer::BoUpSLP {
/// root of profitable tree to vectorize. Return std::nullopt if no candidate
/// scored above the LookAheadHeuristics::ScoreFail. \param Limit Lower limit
/// of the cost, considered to be good enough score.
- std::optional<int>
+ std::pair<std::optional<int>, int>
findBestRootPair(ArrayRef<std::pair<Value *, Value *>> Candidates,
int Limit = LookAheadHeuristics::ScoreFail) const {
LookAheadHeuristics LookAhead(*TLI, *DL, *SE, *this, /*NumLanes=*/2,
@@ -3567,7 +3580,7 @@ class slpvectorizer::BoUpSLP {
Index = I;
}
}
- return Index;
+ return std::make_pair(Index, BestScore);
}
/// Checks if the instruction is marked for deletion.
@@ -4654,8 +4667,7 @@ class slpvectorizer::BoUpSLP {
/// in general.
ScalarsVectorizationLegality
getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx,
- bool TryCopyableElementsVectorization) const;
+ const EdgeInfo &UserTreeIdx) const;
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
@@ -11453,26 +11465,226 @@ class InstructionsCompatibilityAnalysis {
llvm_unreachable("Unexpected vectorization of the instructions.");
}
+ /// Check if the specified \p VL list of values is better to represent as
+ /// uniform with copyables, as modeled via \p CopyableS, or as alternate (or
+ /// uniform with compatible ops), modeled via \p S.
+ /// Performs the analysis of the operands, choosing the preferred main
+ /// instruction and checking the matching of the operands for the main
+ /// instruction and copyable elements.
+ bool isCopyablePreferable(ArrayRef<Value *> VL, const BoUpSLP &R,
+ const InstructionsState &S,
+ const InstructionsState &CopyableS) {
+ // If all elements are vectorized already - keep as is.
+ if (all_of(VL, [&](Value *V) {
+ return isa<PoisonValue>(V) || R.isVectorized(V);
+ }))
+ return false;
+ Instruction *SMain = S.getMainOp();
+ Instruction *SAlt = S.isAltShuffle() ? S.getAltOp() : nullptr;
+ const bool IsCommutative = ::isCommutative(SMain);
+ const bool IsAltCommutative =
+ S.isAltShuffle() ? ::isCommutative(SAlt) : false;
+ const bool IsMainCommutative = ::isCommutative(MainOp);
+ SmallVector<BoUpSLP::ValueList> Ops;
+ buildOriginalOperands(S, SMain, Ops);
+ // Support only binary operations for now.
+ if (Ops.size() != 2)
+ return false;
+ // Try to find better candidate for S main instruction, which operands have
+ // better matching.
+ auto CheckOperands = [](Value *Op, Value *SMainOp) {
+ auto *OpI = dyn_cast<BinaryOperator>(Op);
+ if (!OpI)
+ return false;
+ auto *SMainOpI = dyn_cast<BinaryOperator>(SMainOp);
+ if (!SMainOpI)
+ return true;
+ return any_of(OpI->operands(), [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return I && I->getOpcode() == SMainOpI->getOpcode();
+ });
+ };
+ SmallPtrSet<Value *, 8> Operands;
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || I == SMain)
+ continue;
+ Instruction *MatchingOp = S.getMatchingMainOpOrAltOp(I);
+ if (MatchingOp != SMain)
+ continue;
+ SmallVector<BoUpSLP::ValueList> VOps;
+ buildOriginalOperands(S, I, VOps);
+ Operands.insert(I->op_begin(), I->op_end());
+ assert(VOps.size() == 2 && Ops.size() == 2 &&
+ "Expected binary operations only.");
+ if (CheckOperands(VOps[0][0], Ops[0][0]) ||
+ CheckOperands(VOps[1][0], Ops[1][0]) ||
+ (IsCommutative && (CheckOperands(VOps[0][0], Ops[1][0]) ||
+ CheckOperands(VOps[1][0], Ops[0][0])))) {
+ SMain = I;
+ Ops.swap(VOps);
+ break;
+ }
+ }
+ SmallVector<BoUpSLP::ValueList> MainOps;
+ buildOriginalOperands(S, MainOp, MainOps);
+
+ auto BuildFirstOperandCandidates =
+ [&](SmallVectorImpl<std::pair<Value *, Value *>> &Candidates,
+ ArrayRef<BoUpSLP::ValueList> Ops, Value *Op0, Value *Op1,
+ bool IsCommutative) {
+ Candidates.emplace_back(Ops[0][0], Op0);
+ if (IsCommutative)
+ Candidates.emplace_back(Ops[0][0], Op1);
+ };
+
+ auto BuildSecondOperandCandidates =
+ [&](SmallVectorImpl<std::pair<Value *, Value *>> &Candidates,
+ ArrayRef<BoUpSLP::ValueList> Ops, int PrevBestIdx, Value *Op0,
+ Value *Op1, bool IsCommutative) {
+ if (PrevBestIdx != 1)
+ Candidates.emplace_back(Ops[1][0], Op1);
+ if (PrevBestIdx != 0 && IsCommutative)
+ Candidates.emplace_back(Ops[1][0], Op0);
+ };
+
+ auto FindBestCandidate =
+ [&](ArrayRef<std::pair<Value *, Value *>> Candidates, bool &IsConst,
+ int &Score) {
+ auto Res = R.findBestRootPair(Candidates);
+ Score = Res.second;
+ IsConst =
+ Res.second == BoUpSLP::LookAheadHeuristics::ScoreConstants &&
+ isConstant(Candidates[Res.first.value_or(0)].first) &&
+ isConstant(Candidates[Res.first.value_or(0)].second);
+ if (IsConst) {
+ // Check if there are splat candidates and consider them better
+ // option.
+ for (const auto [Idx, P] : enumerate(Candidates)) {
+ if (!isConstant(P.first) && !isConstant(P.second) &&
+ P.second == P.first) {
+ Res.first = Idx;
+ IsConst = false;
+ Score = isa<LoadInst>(Candidates[Res.first.value_or(0)].first)
+ ? BoUpSLP::LookAheadHeuristics::ScoreSplatLoads
+ : BoUpSLP::LookAheadHeuristics::ScoreSplat;
+ break;
+ }
+ }
+ }
+ return Res.first;
+ };
+
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || (I == MainOp && (!S.isAltShuffle() || I == SMain)) ||
+ (!S.isAltShuffle() && I == SMain))
+ continue;
+ SmallVector<BoUpSLP::ValueList> VOps;
+ buildOriginalOperands(S, I == SMain ? MainOp : I, VOps);
+ SmallVector<Value *> CopyableOps =
+ getOperands(CopyableS, I == MainOp ? SMain : I);
+ if (CopyableOps.size() == VOps.size() &&
+ all_of(zip(CopyableOps, VOps), [&](const auto &P) {
+ return std::get<0>(P) == std::get<1>(P)[0];
+ }))
+ continue;
+ SmallVector<std::pair<Value *, Value *>> Candidates;
+ BuildFirstOperandCandidates(Candidates, MainOps, CopyableOps[0],
+ CopyableOps[1], IsMainCommutative);
+ const unsigned OpSize = Candidates.size();
+ Instruction *MatchingOp =
+ S.getMatchingMainOpOrAltOp(I) == S.getMainOp() ? SMain : SAlt;
+ const bool IsCommutativeInst =
+ (MatchingOp == SMain ? IsCommutative : IsAltCommutative) ||
+ ::isCommutative(I, MatchingOp);
+ if (S.isAltShuffle() && MatchingOp == SAlt &&
+ any_of(VOps, [&](const BoUpSLP::ValueList &Ops) {
+ auto *I = dyn_cast<BinaryOperator>(Ops[0]);
+ return I && Operands.contains(I);
+ }))
+ return false;
+ if (S.isAltShuffle() && MatchingOp == SMain)
+ Operands.insert(I->op_begin(), I->op_end());
+ BuildFirstOperandCandidates(Candidates, Ops, VOps[0][0], VOps[1][0],
+ IsCommutativeInst);
+ bool IsBestConst;
+ int Score;
+ std::optional<int> BestOp =
+ FindBestCandidate(Candidates, IsBestConst, Score);
+ const bool IsOriginalBetter =
+ static_cast<unsigned>(BestOp.value_or(OpSize)) >= OpSize;
+ Candidates.clear();
+ BuildSecondOperandCandidates(
+ Candidates, MainOps, IsOriginalBetter ? -1 : *BestOp, CopyableOps[0],
+ CopyableOps[1], IsMainCommutative);
+ const unsigned SecondOpSize = Candidates.size();
+ BuildSecondOperandCandidates(
+ Candidates, Ops,
+ IsOriginalBetter ? BestOp.value_or(OpSize - 1) - OpSize : -1,
+ VOps[0][0], VOps[1][0], IsCommutativeInst);
+ bool IsSecondBestConst;
+ int SecondScore;
+ std::optional<int> SecondBestOp =
+ FindBestCandidate(Candidates, IsSecondBestConst, SecondScore);
+ // No best candidates.
+ if (!BestOp && !SecondBestOp)
+ return false;
+ // Original better in both ops combinations.
+ const bool IsSecondOriginalBetter =
+ static_cast<unsigned>(SecondBestOp.value_or(SecondOpSize)) >=
+ SecondOpSize;
+ if (IsOriginalBetter && IsSecondOriginalBetter)
+ return false;
+ // Original is better in second combination, but in the first combination
+ // no best candidates.
+ if (!BestOp && IsSecondOriginalBetter)
+ return false;
+ // Original is better in first combination, but in the second combination
+ // no best candidates.
+ if (!SecondBestOp && IsOriginalBetter)
+ return false;
+ // Copyable is best in the first combination, but it is constant, but
+ // original is better in second non-constant combination.
+ if (!IsOriginalBetter && IsBestConst && IsSecondOriginalBetter &&
+ !IsSecondBestConst)
+ return false;
+ // Copyable is best in the second combination, but it is constant, but
+ // original is better in the first non-constant combination.
+ if (BestOp && IsOriginalBetter && !IsBestConst &&
+ !IsSecondOriginalBetter && IsSecondBestConst)
+ return false;
+ // Original combination score is better.
+ if (((Score > SecondScore ||
+ (Score <= BoUpSLP::LookAheadHeuristics::ScoreAltOpcodes &&
+ Score == SecondScore)) &&
+ IsOriginalBetter) ||
+ (IsSecondOriginalBetter &&
+ (SecondScore > Score ||
+ (Score <= BoUpSLP::LookAheadHeuristics::ScoreAltOpcodes &&
+ Score == SecondScore))))
+ return false;
+ }
+ return true;
+ }
+
public:
InstructionsCompatibilityAnalysis(DominatorTree &DT, const DataLayout &DL,
const TargetTransformInfo &TTI,
const TargetLibraryInfo &TLI)
: DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
- InstructionsState
- buildInstructionsState(ArrayRef<Value *> VL, const BoUpSLP &R,
- bool TryCopyableElementsVectorization,
- bool WithProfitabilityCheck = false,
- bool SkipSameCodeCheck = false) {
+ InstructionsState buildInstructionsState(ArrayRef<Value *> VL,
+ const BoUpSLP &R,
+ bool WithProfitabilityCheck = false,
+ bool SkipSameCodeCheck = false) {
InstructionsState S = (SkipSameCodeCheck || !allSameBlock(VL))
? InstructionsState::invalid()
: getSameOpcode(VL, TLI);
- if (S)
- return S;
// Check if series of selects + zext i1 %x to in can be combined into
// selects + select %x, i32 1, i32 0.
Instruction *SelectOp = nullptr;
- if (allSameBlock(VL) && all_of(VL, [&](Value *V) {
+ if (!S && allSameBlock(VL) && all_of(VL, [&](Value *V) {
if (match(V, m_Select(m_Value(), m_Value(), m_Value()))) {
if (!SelectOp)
SelectOp = cast<Instruction>(V);
@@ -11485,12 +11697,33 @@ class InstructionsCompatibilityAnalysis {
if (SelectOp)
return InstructionsState(SelectOp, SelectOp);
}
- if (!VectorizeCopyableElements || !TryCopyableElementsVectorization)
+ if (S && S.isAltShuffle()) {
+ Type *ScalarTy = S.getMainOp()->getType();
+ VectorType *VecTy = getWidenedType(ScalarTy, VL.size());
+ unsigned Opcode0 = S.getOpcode();
+ unsigned Opcode1 = S.getAltOpcode();
+ SmallBitVector OpcodeMask(
+ getAltInstrMask(VL, ScalarTy, Opcode0, Opcode1));
+ // If this pattern is supported by the target then we consider the order.
+ if (TTI.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask))
+ return S;
+ } else if (S && (!VectorizeCopyableElements ||
+ !isa<BinaryOperator>(S.getMainOp()) ||
+ all_of(VL, [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || I->getOpcode() == S.getOpcode();
+ }))) {
+ return S;
+ }
+ if (!VectorizeCopyableElements)
return S;
findAndSetMainInstruction(VL, R);
if (!MainOp)
- return InstructionsState::invalid();
+ return S;
+ InstructionsState OrigS = S;
S = InstructionsState(MainOp, MainOp, /*HasCopyables=*/true);
+ if (OrigS && !isCopyablePreferable(VL, R, OrigS, S))
+ return OrigS;
if (!WithProfitabilityCheck)
return S;
// Check if it is profitable to vectorize the instruction.
@@ -11513,19 +11746,19 @@ class InstructionsCompatibilityAnalysis {
BuildCandidates(Candidates1, Operands[0][0], Operands[0][1]);
BuildCandidates(Candidates2, Operands[1][0], Operands[1][1]);
bool Res = !Candidates1.empty() && !Candidates2.empty() &&
- R.findBestRootPair(Candidates1) &&
- R.findBestRootPair(Candidates2);
+ R.findBestRootPair(Candidates1).first &&
+ R.findBestRootPair(Candidates2).first;
if (!Res && isCommutative(MainOp)) {
Candidates1.clear();
Candidates2.clear();
BuildCandidates(Candidates1, Operands[0][0], Operands[1][1]);
BuildCandidates(Candidates2, Operands[1][0], Operands[0][1]);
Res = !Candidates1.empty() && !Candidates2.empty() &&
- R.findBestRootPair(Candidates1) &&
- R.findBestRootPair(Candidates2);
+ R.findBestRootPair(Candidates1).first &&
+ R.findBestRootPair(Candidates2).first;
}
if (!Res)
- return InstructionsState::invalid();
+ return OrigS;
constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
InstructionCost ScalarCost = TTI.getInstructionCost(S.getMainOp(), Kind);
InstructionCost VectorCost;
@@ -11551,7 +11784,7 @@ class InstructionsCompatibilityAnalysis {
llvm_unreachable("Unexpected instruction.");
}
if (VectorCost > ScalarCost)
- return InstructionsState::invalid();
+ return OrigS;
return S;
}
assert(Operands.size() == 2 && "Unexpected number of operands!");
@@ -11567,7 +11800,7 @@ class InstructionsCompatibilityAnalysis {
all_of(VL, [&](Value *V) {
return isa<PHINode>(V) || !S.isCopyableElement(V);
}))
- return InstructionsState::invalid();
+ return OrigS;
// Check profitability if number of copyables > VL.size() / 2.
// 1. Reorder operands for better matching.
if (isCommutative(MainOp)) {
@@ -11587,7 +11820,7 @@ class InstructionsCompatibilityAnalysis {
}
// 2. Check, if operands can be vectorized.
if (count_if(Operands.back(), IsaPred<Instruction>) > 1)
- return InstructionsState::invalid();
+ return OrigS;
auto CheckOperand = [&](ArrayRef<Value *> Ops) {
if (allConstant(Ops) || isSplat(Ops))
return true;
@@ -11610,8 +11843,7 @@ class InstructionsCompatibilityAnalysis {
// First operand not a constant or splat? Last attempt - check for
// potential vectorization.
InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
- InstructionsState OpS = Analysis.buildInstructionsState(
- Ops, R, /*TryCopyableElementsVectorization=*/true);
+ InstructionsState OpS = Analysis.buildInstructionsState(Ops, R);
if (!OpS || (OpS.getOpcode() == Instruction::PHI && !allSameBlock(Ops)))
return false;
unsigned CopyableNum =
@@ -11619,7 +11851,7 @@ class InstructionsCompatibilityAnalysis {
return CopyableNum <= VL.size() / 2;
};
if (!CheckOperand(Operands.front()))
- return InstructionsState::invalid();
+ return OrigS;
return S;
}
@@ -11646,15 +11878,14 @@ class InstructionsCompatibilityAnalysis {
};
} // namespace
-BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
- ArrayRef<Value *> VL, unsigned Depth, const EdgeInfo &UserTreeIdx,
- bool TryCopyableElementsVectorization) const {
+BoUpSLP::ScalarsVectorizationLegality
+BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
+ const EdgeInfo &UserTreeIdx) const {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
InstructionsState S = Analysis.buildInstructionsState(
- VL, *this, TryCopyableElementsVectorization,
- /*WithProfitabilityCheck=*/true, TryCopyableElementsVectorization);
+ VL, *this, /*WithProfitabilityCheck=*/true);
bool AreScatterAllGEPSameBlock = false;
if (!S) {
@@ -11915,8 +12146,8 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
return;
}
- ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality(
- VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false);
+ ScalarsVectorizationLegality Legality =
+ getScalarsVectorizationLegality(VL, Depth, UserTreeIdx);
InstructionsState S = Legality.getInstructionsState();
if (!Legality.isLegal()) {
if (Legality.trySplitVectorize()) {
@@ -11925,18 +12156,11 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
return;
}
- if (!S)
- Legality = getScalarsVectorizationLegality(
- VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true);
- if (!Legality.isLegal()) {
- if (Legality.tryToFindDuplicates())
- tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S,
- UserTreeIdx);
+ if (Legality.tryToFindDuplicates())
+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
- newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
- return;
- }
- S = Legality.getInstructionsState();
+ newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
+ return;
}
// FIXME: investigate if there are profitable cases for VL.size() <= 4.
@@ -13687,15 +13911,15 @@ void BoUpSLP::transformNodes() {
for (unsigned Op : seq<unsigned>(S.getMainOp()->getNumOperands()))
Candidates.emplace_back().emplace_back(I1->getOperand(Op),
I2->getOperand(Op));
- return all_of(
- Candidates, [this](ArrayRef<std::pair<Value *, Value *>> Cand) {
- return all_of(Cand,
- [](const std::pair<Value *, Value *> &P) {
- return isa<Constant>(P.first) ||
- isa<Constant>(P.second) || P.first == P.second;
- }) ||
- findBestRootPair(Cand, LookAheadHeuristics::ScoreSplatLoads);
- });
+ return all_of(Candidates, [this](
+ ArrayRef<std::pair<Value *, Value *>> Cand) {
+ return all_of(Cand,
+ [](const std::pair<Value *, Value *> &P) {
+ return isa<Constant>(P.first) ||
+ isa<Constant>(P.second) || P.first == P.second;
+ }) ||
+ findBestRootPair(Cand, LookAheadHeuristics::ScoreSplatLoads).first;
+ });
};
// Try to reorder gather nodes for better vectorization opportunities.
@@ -24842,8 +25066,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
ValOps.insert(cast<StoreInst>(V)->getValueOperand());
// Operands are not same/alt opcodes or non-power-of-2 uniques - exit.
InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
- InstructionsState S = Analysis.buildInstructionsState(
- ValOps.getArrayRef(), R, /*TryCopyableElementsVectorization=*/true);
+ InstructionsState S =
+ Analysis.buildInstructionsState(ValOps.getArrayRef(), R);
if (all_of(ValOps, IsaPred<Instruction>) && ValOps.size() > 1) {
DenseSet<Value *> Stores(Chain.begin(), Chain.end());
bool IsAllowedSize =
@@ -26321,8 +26545,8 @@ class HorizontalReduction {
Ops.append(RV.begin(), RV.end());
InstructionsCompatibilityAnalysis Analysis(DT, DL, *TTI, TLI);
InstructionsState OpS = Analysis.buildInstructionsState(
- Ops, V, /*TryCopyableElementsVectorization=*/true,
- /*WithProfitabilityCheck=*/true, /*SkipSameCodeCheck=*/true);
+ Ops, V, /*WithProfitabilityCheck=*/true,
+ /*SkipSameCodeCheck=*/true);
if (OpS && OpS.areInstructionsWithCopyableElements()) {
if (LocalReducedVals.empty()) {
LocalReducedVals.push_back(Ops);
@@ -27947,7 +28171,7 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
return TryToReduce(I, {Op0, Op1}) || tryToVectorizeList({Op0, Op1}, R);
// We have multiple options. Try to pick the single best.
- std::optional<int> BestCandidate = R.findBestRootPair(Candidates);
+ std::optional<int> BestCandidate = R.findBestRootPair(Candidates).first;
if (!BestCandidate)
return false;
return (*BestCandidate == 0 &&
@@ -28872,7 +29096,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
NewVL.back() = V1->getValueOperand();
InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
InstructionsState S = Analysis.buildInstructionsState(
- NewVL, R, VectorizeCopyableElements, /*WithProfitabilityCheck=*/true,
+ NewVL, R, /*WithProfitabilityCheck=*/true,
/*SkipSameCodeCheck=*/!SameParent);
if (S)
return true;
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/scalarize-load-ext-extract.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/scalarize-load-ext-extract.ll
index 5bdc2327379b2..8210fd4113013 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/scalarize-load-ext-extract.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/scalarize-load-ext-extract.ll
@@ -8,10 +8,8 @@ define noundef i32 @load_ext_extract(ptr %src) {
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP14]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 0, i32 8, i32 16, i32 0>
-; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 255, i32 255, i32 255, i32 poison>
-; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP2]], <i32 255, i32 255, i32 255, i32 24>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 0, i32 8, i32 16, i32 24>
+; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP2]], <i32 255, i32 255, i32 255, i32 -1>
; CHECK-NEXT: [[ADD3:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: ret i32 [[ADD3]]
;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/avg.ll b/llvm/test/Transforms/PhaseOrdering/X86/avg.ll
index a7393fbf2e917..d3e64f93441bc 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/avg.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/avg.ll
@@ -39,10 +39,10 @@ define { i64, i64 } @avgr_16_u8(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE2-NEXT: [[B_SROA_16_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE1]], 48
; SSE2-NEXT: [[TMP16:%.*]] = and <2 x i64> [[TMP2]], splat (i64 255)
; SSE2-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP10]], splat (i64 255)
-; SSE2-NEXT: [[CONV1_14:%.*]] = and i64 [[A_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE2-NEXT: [[TMP18:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
; SSE2-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> [[TMP18]], i16 [[TMP7]], i64 1
; SSE2-NEXT: [[TMP20:%.*]] = lshr <2 x i16> [[TMP19]], splat (i16 8)
+; SSE2-NEXT: [[B_SROA_8_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 56
; SSE2-NEXT: [[TMP21:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i64 0
; SSE2-NEXT: [[TMP22:%.*]] = insertelement <2 x i16> [[TMP21]], i16 [[TMP15]], i64 1
; SSE2-NEXT: [[TMP23:%.*]] = lshr <2 x i16> [[TMP22]], splat (i16 8)
@@ -71,10 +71,10 @@ define { i64, i64 } @avgr_16_u8(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE2-NEXT: [[TMP42:%.*]] = and <2 x i64> [[TMP14]], splat (i64 255)
; SSE2-NEXT: [[TMP43:%.*]] = add nuw nsw <2 x i64> [[TMP41]], splat (i64 1)
; SSE2-NEXT: [[TMP44:%.*]] = add nuw nsw <2 x i64> [[TMP43]], [[TMP42]]
+; SSE2-NEXT: [[CONV1_14:%.*]] = and i64 [[A_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE2-NEXT: [[CONV4_14:%.*]] = and i64 [[B_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE2-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[A_SROA_8_0_EXTRACT_SHIFT]], 1
; SSE2-NEXT: [[ADD_14:%.*]] = add nuw nsw i64 [[CONV1_14]], 1
-; SSE2-NEXT: [[B_SROA_8_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 56
; SSE2-NEXT: [[ADD5_14:%.*]] = add nuw nsw i64 [[ADD_14]], [[CONV4_14]]
; SSE2-NEXT: [[ADD5_7:%.*]] = add nuw nsw i64 [[ADD_7]], [[B_SROA_8_0_EXTRACT_SHIFT]]
; SSE2-NEXT: [[ADD_15:%.*]] = add nuw nsw i64 [[A_SROA_17_8_EXTRACT_SHIFT]], 1
@@ -141,10 +141,10 @@ define { i64, i64 } @avgr_16_u8(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE4-NEXT: [[B_SROA_16_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE1]], 48
; SSE4-NEXT: [[TMP16:%.*]] = and <2 x i64> [[TMP2]], splat (i64 255)
; SSE4-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP10]], splat (i64 255)
-; SSE4-NEXT: [[CONV1_14:%.*]] = and i64 [[A_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE4-NEXT: [[TMP18:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
; SSE4-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> [[TMP18]], i16 [[TMP7]], i64 1
; SSE4-NEXT: [[TMP20:%.*]] = lshr <2 x i16> [[TMP19]], splat (i16 8)
+; SSE4-NEXT: [[B_SROA_8_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 56
; SSE4-NEXT: [[TMP21:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i64 0
; SSE4-NEXT: [[TMP22:%.*]] = insertelement <2 x i16> [[TMP21]], i16 [[TMP15]], i64 1
; SSE4-NEXT: [[TMP23:%.*]] = lshr <2 x i16> [[TMP22]], splat (i16 8)
@@ -173,10 +173,10 @@ define { i64, i64 } @avgr_16_u8(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE4-NEXT: [[TMP42:%.*]] = and <2 x i64> [[TMP14]], splat (i64 255)
; SSE4-NEXT: [[TMP43:%.*]] = add nuw nsw <2 x i64> [[TMP41]], splat (i64 1)
; SSE4-NEXT: [[TMP44:%.*]] = add nuw nsw <2 x i64> [[TMP43]], [[TMP42]]
+; SSE4-NEXT: [[CONV1_14:%.*]] = and i64 [[A_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE4-NEXT: [[CONV4_14:%.*]] = and i64 [[B_SROA_16_8_EXTRACT_SHIFT]], 255
; SSE4-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[A_SROA_8_0_EXTRACT_SHIFT]], 1
; SSE4-NEXT: [[ADD_14:%.*]] = add nuw nsw i64 [[CONV1_14]], 1
-; SSE4-NEXT: [[B_SROA_8_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 56
; SSE4-NEXT: [[ADD5_14:%.*]] = add nuw nsw i64 [[ADD_14]], [[CONV4_14]]
; SSE4-NEXT: [[ADD5_7:%.*]] = add nuw nsw i64 [[ADD_7]], [[B_SROA_8_0_EXTRACT_SHIFT]]
; SSE4-NEXT: [[ADD_15:%.*]] = add nuw nsw i64 [[A_SROA_17_8_EXTRACT_SHIFT]], 1
@@ -731,69 +731,64 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE2-LABEL: @avgr_8_u16(
; SSE2-NEXT: entry:
; SSE2-NEXT: [[TMP0:%.*]] = trunc i64 [[A_COERCE0:%.*]] to i32
-; SSE2-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 16
; SSE2-NEXT: [[A_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE0]], 32
; SSE2-NEXT: [[A_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE0]], 48
; SSE2-NEXT: [[TMP2:%.*]] = trunc i64 [[A_COERCE1:%.*]] to i32
-; SSE2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP2]], 16
; SSE2-NEXT: [[A_SROA_8_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE1]], 32
-; SSE2-NEXT: [[A_SROA_9_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE1]], 48
-; SSE2-NEXT: [[TMP4:%.*]] = trunc i64 [[B_COERCE0:%.*]] to i32
-; SSE2-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP4]], 16
+; SSE2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[B_COERCE0:%.*]], i64 0
+; SSE2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[B_COERCE1:%.*]], i64 1
+; SSE2-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
; SSE2-NEXT: [[B_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 32
-; SSE2-NEXT: [[B_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 48
-; SSE2-NEXT: [[TMP6:%.*]] = trunc i64 [[B_COERCE1:%.*]] to i32
-; SSE2-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
; SSE2-NEXT: [[B_SROA_8_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE1]], 32
+; SSE2-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[A_COERCE0]], i64 0
+; SSE2-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[A_COERCE1]], i64 1
+; SSE2-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP6]], splat (i64 65535)
+; SSE2-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP3]], splat (i64 65535)
+; SSE2-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
+; SSE2-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP2]], i64 1
+; SSE2-NEXT: [[TMP11:%.*]] = lshr <2 x i32> [[TMP10]], splat (i32 16)
+; SSE2-NEXT: [[CONV2_4:%.*]] = lshr i64 [[B_COERCE0]], 48
+; SSE2-NEXT: [[TMP20:%.*]] = lshr <2 x i32> [[TMP4]], splat (i32 16)
+; SSE2-NEXT: [[CONV_6:%.*]] = lshr i64 [[A_COERCE1]], 48
+; SSE2-NEXT: [[A_SROA_9_8_EXTRACT_SHIFT:%.*]] = and i64 [[A_SROA_3_0_EXTRACT_SHIFT]], 65535
; SSE2-NEXT: [[B_SROA_9_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE1]], 48
-; SSE2-NEXT: [[CONV:%.*]] = and i64 [[A_COERCE0]], 65535
-; SSE2-NEXT: [[CONV2:%.*]] = and i64 [[B_COERCE0]], 65535
-; SSE2-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV]], 1
-; SSE2-NEXT: [[ADD3:%.*]] = add nuw nsw i64 [[ADD]], [[CONV2]]
-; SSE2-NEXT: [[SHR:%.*]] = lshr i64 [[ADD3]], 1
-; SSE2-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[TMP1]], 1
-; SSE2-NEXT: [[ADD3_1:%.*]] = add nuw nsw i32 [[ADD_1]], [[TMP5]]
-; SSE2-NEXT: [[CONV_2:%.*]] = and i64 [[A_SROA_3_0_EXTRACT_SHIFT]], 65535
; SSE2-NEXT: [[CONV2_2:%.*]] = and i64 [[B_SROA_3_0_EXTRACT_SHIFT]], 65535
-; SSE2-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[CONV_2]], 1
-; SSE2-NEXT: [[ADD3_2:%.*]] = add nuw nsw i64 [[ADD_2]], [[CONV2_2]]
-; SSE2-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[A_SROA_4_0_EXTRACT_SHIFT]], 1
+; SSE2-NEXT: [[TMP31:%.*]] = add nuw nsw <2 x i64> [[TMP7]], splat (i64 1)
+; SSE2-NEXT: [[TMP14:%.*]] = add nuw nsw <2 x i64> [[TMP31]], [[TMP8]]
+; SSE2-NEXT: [[TMP15:%.*]] = lshr <2 x i64> [[TMP14]], splat (i64 1)
+; SSE2-NEXT: [[TMP16:%.*]] = add nuw nsw <2 x i32> [[TMP11]], splat (i32 1)
+; SSE2-NEXT: [[TMP17:%.*]] = add nuw nsw <2 x i32> [[TMP16]], [[TMP20]]
+; SSE2-NEXT: [[CONV_7:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
+; SSE2-NEXT: [[B_SROA_4_0_EXTRACT_SHIFT:%.*]] = and i64 [[B_SROA_8_8_EXTRACT_SHIFT]], 65535
+; SSE2-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[A_SROA_4_0_EXTRACT_SHIFT]], 1
+; SSE2-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[CONV_7]], 1
; SSE2-NEXT: [[ADD3_3:%.*]] = add nuw nsw i64 [[ADD_3]], [[B_SROA_4_0_EXTRACT_SHIFT]]
-; SSE2-NEXT: [[CONV_4:%.*]] = and i64 [[A_COERCE1]], 65535
-; SSE2-NEXT: [[CONV2_4:%.*]] = and i64 [[B_COERCE1]], 65535
-; SSE2-NEXT: [[ADD_4:%.*]] = add nuw nsw i64 [[CONV_4]], 1
; SSE2-NEXT: [[ADD3_4:%.*]] = add nuw nsw i64 [[ADD_4]], [[CONV2_4]]
-; SSE2-NEXT: [[SHR_4:%.*]] = lshr i64 [[ADD3_4]], 1
-; SSE2-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[TMP3]], 1
-; SSE2-NEXT: [[ADD3_5:%.*]] = add nuw nsw i32 [[ADD_5]], [[TMP7]]
-; SSE2-NEXT: [[CONV_6:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
-; SSE2-NEXT: [[CONV2_6:%.*]] = and i64 [[B_SROA_8_8_EXTRACT_SHIFT]], 65535
; SSE2-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[CONV_6]], 1
-; SSE2-NEXT: [[ADD3_6:%.*]] = add nuw nsw i64 [[ADD_6]], [[CONV2_6]]
; SSE2-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[A_SROA_9_8_EXTRACT_SHIFT]], 1
-; SSE2-NEXT: [[ADD3_7:%.*]] = add nuw nsw i64 [[ADD_7]], [[B_SROA_9_8_EXTRACT_SHIFT]]
-; SSE2-NEXT: [[TMP8:%.*]] = shl nuw i64 [[ADD3_3]], 47
-; SSE2-NEXT: [[RETVAL_SROA_4_0_INSERT_EXT:%.*]] = and i64 [[TMP8]], -281474976710656
-; SSE2-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[ADD3_2]], 31
-; SSE2-NEXT: [[RETVAL_SROA_3_0_INSERT_SHIFT:%.*]] = and i64 [[TMP9]], 281470681743360
-; SSE2-NEXT: [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_4_0_INSERT_EXT]], [[RETVAL_SROA_3_0_INSERT_SHIFT]]
-; SSE2-NEXT: [[TMP10:%.*]] = shl nuw i32 [[ADD3_1]], 15
-; SSE2-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], -65536
-; SSE2-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = zext i32 [[TMP11]] to i64
-; SSE2-NEXT: [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], [[RETVAL_SROA_2_0_INSERT_SHIFT]]
-; SSE2-NEXT: [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_2_0_INSERT_INSERT]], [[SHR]]
-; SSE2-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[RETVAL_SROA_0_0_INSERT_INSERT]], 0
+; SSE2-NEXT: [[ADD3_7:%.*]] = add nuw nsw i64 [[ADD_6]], [[B_SROA_9_8_EXTRACT_SHIFT]]
+; SSE2-NEXT: [[ADD3_2:%.*]] = add nuw nsw i64 [[ADD_7]], [[CONV2_2]]
; SSE2-NEXT: [[TMP12:%.*]] = shl nuw i64 [[ADD3_7]], 47
+; SSE2-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[ADD3_2]], 31
; SSE2-NEXT: [[RETVAL_SROA_9_8_INSERT_EXT:%.*]] = and i64 [[TMP12]], -281474976710656
-; SSE2-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[ADD3_6]], 31
+; SSE2-NEXT: [[SHR_4:%.*]] = and i64 [[TMP9]], 281470681743360
+; SSE2-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[ADD3_3]], 31
+; SSE2-NEXT: [[TMP21:%.*]] = shl nuw i64 [[ADD3_4]], 47
; SSE2-NEXT: [[RETVAL_SROA_8_8_INSERT_SHIFT:%.*]] = and i64 [[TMP13]], 281470681743360
+; SSE2-NEXT: [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = and i64 [[TMP21]], -281474976710656
; SSE2-NEXT: [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_9_8_INSERT_EXT]], [[RETVAL_SROA_8_8_INSERT_SHIFT]]
-; SSE2-NEXT: [[TMP14:%.*]] = shl nuw i32 [[ADD3_5]], 15
-; SSE2-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], -65536
-; SSE2-NEXT: [[RETVAL_SROA_7_8_INSERT_SHIFT:%.*]] = zext i32 [[TMP15]] to i64
-; SSE2-NEXT: [[RETVAL_SROA_7_8_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], [[RETVAL_SROA_7_8_INSERT_SHIFT]]
; SSE2-NEXT: [[RETVAL_SROA_5_8_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_7_8_INSERT_INSERT]], [[SHR_4]]
-; SSE2-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], 1
+; SSE2-NEXT: [[TMP22:%.*]] = shl nuw <2 x i32> [[TMP17]], splat (i32 15)
+; SSE2-NEXT: [[TMP23:%.*]] = and <2 x i32> [[TMP22]], splat (i32 -65536)
+; SSE2-NEXT: [[TMP24:%.*]] = zext <2 x i32> [[TMP23]] to <2 x i64>
+; SSE2-NEXT: [[TMP25:%.*]] = insertelement <2 x i64> poison, i64 [[RETVAL_SROA_5_8_INSERT_INSERT]], i64 0
+; SSE2-NEXT: [[TMP26:%.*]] = insertelement <2 x i64> [[TMP25]], i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], i64 1
+; SSE2-NEXT: [[TMP27:%.*]] = or disjoint <2 x i64> [[TMP26]], [[TMP24]]
+; SSE2-NEXT: [[TMP28:%.*]] = or disjoint <2 x i64> [[TMP27]], [[TMP15]]
+; SSE2-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[TMP28]], i64 0
+; SSE2-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { i64, i64 } poison, i64 [[TMP29]], 0
+; SSE2-NEXT: [[TMP30:%.*]] = extractelement <2 x i64> [[TMP28]], i64 1
+; SSE2-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { i64, i64 } [[DOTFCA_0_INSERT]], i64 [[TMP30]], 1
; SSE2-NEXT: ret { i64, i64 } [[DOTFCA_1_INSERT]]
;
; SSE4-LABEL: @avgr_8_u16(
@@ -812,10 +807,10 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE4-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[A_COERCE1]], i64 1
; SSE4-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP6]], splat (i64 65535)
; SSE4-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP3]], splat (i64 65535)
-; SSE4-NEXT: [[CONV_6:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
; SSE4-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; SSE4-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP1]], i64 1
; SSE4-NEXT: [[TMP11:%.*]] = lshr <2 x i32> [[TMP10]], splat (i32 16)
+; SSE4-NEXT: [[B_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 48
; SSE4-NEXT: [[TMP12:%.*]] = lshr <2 x i32> [[TMP4]], splat (i32 16)
; SSE4-NEXT: [[A_SROA_9_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE1]], 48
; SSE4-NEXT: [[CONV_2:%.*]] = and i64 [[A_SROA_3_0_EXTRACT_SHIFT]], 65535
@@ -826,10 +821,10 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; SSE4-NEXT: [[TMP15:%.*]] = lshr <2 x i64> [[TMP14]], splat (i64 1)
; SSE4-NEXT: [[TMP16:%.*]] = add nuw nsw <2 x i32> [[TMP11]], splat (i32 1)
; SSE4-NEXT: [[TMP17:%.*]] = add nuw nsw <2 x i32> [[TMP16]], [[TMP12]]
+; SSE4-NEXT: [[CONV_6:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
; SSE4-NEXT: [[CONV2_6:%.*]] = and i64 [[B_SROA_8_8_EXTRACT_SHIFT]], 65535
; SSE4-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[A_SROA_4_0_EXTRACT_SHIFT]], 1
; SSE4-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[CONV_6]], 1
-; SSE4-NEXT: [[B_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 48
; SSE4-NEXT: [[ADD3_6:%.*]] = add nuw nsw i64 [[ADD_6]], [[CONV2_6]]
; SSE4-NEXT: [[ADD3_3:%.*]] = add nuw nsw i64 [[ADD_3]], [[B_SROA_4_0_EXTRACT_SHIFT]]
; SSE4-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[A_SROA_9_8_EXTRACT_SHIFT]], 1
@@ -863,19 +858,19 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; AVX2-NEXT: entry:
; AVX2-NEXT: [[TMP0:%.*]] = trunc i64 [[A_COERCE0:%.*]] to i32
; AVX2-NEXT: [[A_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE0]], 32
-; AVX2-NEXT: [[A_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE0]], 48
+; AVX2-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[A_COERCE0]], i64 0
+; AVX2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[B_COERCE1:%.*]], i64 1
+; AVX2-NEXT: [[TMP19:%.*]] = lshr <2 x i64> [[TMP18]], <i64 48, i64 32>
; AVX2-NEXT: [[TMP1:%.*]] = trunc i64 [[A_COERCE1:%.*]] to i32
-; AVX2-NEXT: [[A_SROA_8_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE1]], 32
; AVX2-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[B_COERCE0:%.*]], i64 0
-; AVX2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[B_COERCE1:%.*]], i64 1
+; AVX2-NEXT: [[TMP20:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[A_COERCE1]], i64 1
+; AVX2-NEXT: [[TMP21:%.*]] = lshr <2 x i64> [[TMP20]], <i64 48, i64 32>
+; AVX2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[B_COERCE0]], i64 0
; AVX2-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
; AVX2-NEXT: [[B_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 32
-; AVX2-NEXT: [[B_SROA_8_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE1]], 32
-; AVX2-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[A_COERCE0]], i64 0
-; AVX2-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[A_COERCE1]], i64 1
+; AVX2-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP20]], i64 [[A_COERCE0]], i64 0
; AVX2-NEXT: [[TMP7:%.*]] = and <2 x i64> [[TMP6]], splat (i64 65535)
; AVX2-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP3]], splat (i64 65535)
-; AVX2-NEXT: [[CONV_6:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
; AVX2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; AVX2-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP1]], i64 1
; AVX2-NEXT: [[TMP11:%.*]] = lshr <2 x i32> [[TMP10]], splat (i32 16)
@@ -889,31 +884,26 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; AVX2-NEXT: [[TMP15:%.*]] = lshr <2 x i64> [[TMP14]], splat (i64 1)
; AVX2-NEXT: [[TMP16:%.*]] = add nuw nsw <2 x i32> [[TMP11]], splat (i32 1)
; AVX2-NEXT: [[TMP17:%.*]] = add nuw nsw <2 x i32> [[TMP16]], [[TMP12]]
-; AVX2-NEXT: [[CONV2_6:%.*]] = and i64 [[B_SROA_8_8_EXTRACT_SHIFT]], 65535
-; AVX2-NEXT: [[ADD_3:%.*]] = add nuw nsw i64 [[A_SROA_4_0_EXTRACT_SHIFT]], 1
-; AVX2-NEXT: [[ADD_6:%.*]] = add nuw nsw i64 [[CONV_6]], 1
-; AVX2-NEXT: [[B_SROA_4_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[B_COERCE0]], 48
-; AVX2-NEXT: [[ADD3_6:%.*]] = add nuw nsw i64 [[ADD_6]], [[CONV2_6]]
-; AVX2-NEXT: [[ADD3_3:%.*]] = add nuw nsw i64 [[ADD_3]], [[B_SROA_4_0_EXTRACT_SHIFT]]
+; AVX2-NEXT: [[TMP34:%.*]] = and <2 x i64> [[TMP21]], <i64 -1, i64 65535>
+; AVX2-NEXT: [[TMP35:%.*]] = add nuw nsw <2 x i64> [[TMP19]], <i64 1, i64 poison>
+; AVX2-NEXT: [[TMP36:%.*]] = and <2 x i64> [[TMP19]], <i64 poison, i64 65535>
+; AVX2-NEXT: [[TMP25:%.*]] = shufflevector <2 x i64> [[TMP35]], <2 x i64> [[TMP36]], <2 x i32> <i32 0, i32 3>
+; AVX2-NEXT: [[TMP37:%.*]] = add nuw nsw <2 x i64> [[TMP34]], <i64 0, i64 1>
+; AVX2-NEXT: [[TMP38:%.*]] = add nuw nsw <2 x i64> [[TMP25]], [[TMP37]]
; AVX2-NEXT: [[ADD_7:%.*]] = add nuw nsw i64 [[A_SROA_9_8_EXTRACT_SHIFT]], 1
; AVX2-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[CONV_2]], 1
; AVX2-NEXT: [[ADD3_7:%.*]] = add nuw nsw i64 [[ADD_7]], [[B_SROA_9_8_EXTRACT_SHIFT]]
; AVX2-NEXT: [[ADD3_2:%.*]] = add nuw nsw i64 [[ADD_2]], [[CONV2_2]]
-; AVX2-NEXT: [[TMP18:%.*]] = shl nuw i64 [[ADD3_7]], 47
-; AVX2-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[ADD3_2]], 31
-; AVX2-NEXT: [[RETVAL_SROA_9_8_INSERT_EXT:%.*]] = and i64 [[TMP18]], -281474976710656
-; AVX2-NEXT: [[RETVAL_SROA_3_0_INSERT_SHIFT:%.*]] = and i64 [[TMP19]], 281470681743360
-; AVX2-NEXT: [[TMP20:%.*]] = shl nuw nsw i64 [[ADD3_6]], 31
-; AVX2-NEXT: [[TMP21:%.*]] = shl nuw i64 [[ADD3_3]], 47
-; AVX2-NEXT: [[RETVAL_SROA_8_8_INSERT_SHIFT:%.*]] = and i64 [[TMP20]], 281470681743360
-; AVX2-NEXT: [[RETVAL_SROA_4_0_INSERT_EXT:%.*]] = and i64 [[TMP21]], -281474976710656
-; AVX2-NEXT: [[RETVAL_SROA_8_8_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_9_8_INSERT_EXT]], [[RETVAL_SROA_8_8_INSERT_SHIFT]]
-; AVX2-NEXT: [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or disjoint i64 [[RETVAL_SROA_4_0_INSERT_EXT]], [[RETVAL_SROA_3_0_INSERT_SHIFT]]
+; AVX2-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[ADD3_2]], i64 0
+; AVX2-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[ADD3_7]], i64 1
+; AVX2-NEXT: [[TMP41:%.*]] = shl nuw <2 x i64> [[TMP40]], <i64 31, i64 47>
+; AVX2-NEXT: [[TMP31:%.*]] = and <2 x i64> [[TMP41]], <i64 281470681743360, i64 -281474976710656>
+; AVX2-NEXT: [[TMP32:%.*]] = shl nuw <2 x i64> [[TMP38]], <i64 47, i64 31>
+; AVX2-NEXT: [[TMP33:%.*]] = and <2 x i64> [[TMP32]], <i64 -281474976710656, i64 281470681743360>
+; AVX2-NEXT: [[TMP26:%.*]] = or disjoint <2 x i64> [[TMP31]], [[TMP33]]
; AVX2-NEXT: [[TMP22:%.*]] = shl nuw <2 x i32> [[TMP17]], splat (i32 15)
; AVX2-NEXT: [[TMP23:%.*]] = and <2 x i32> [[TMP22]], splat (i32 -65536)
; AVX2-NEXT: [[TMP24:%.*]] = zext <2 x i32> [[TMP23]] to <2 x i64>
-; AVX2-NEXT: [[TMP25:%.*]] = insertelement <2 x i64> poison, i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], i64 0
-; AVX2-NEXT: [[TMP26:%.*]] = insertelement <2 x i64> [[TMP25]], i64 [[RETVAL_SROA_8_8_INSERT_INSERT]], i64 1
; AVX2-NEXT: [[TMP27:%.*]] = or disjoint <2 x i64> [[TMP26]], [[TMP24]]
; AVX2-NEXT: [[TMP28:%.*]] = or disjoint <2 x i64> [[TMP27]], [[TMP15]]
; AVX2-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[TMP28]], i64 0
@@ -929,13 +919,14 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; AVX512-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[B_COERCE1:%.*]], i64 1
; AVX512-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP2]], <i64 48, i64 32>
; AVX512-NEXT: [[TMP4:%.*]] = trunc i64 [[A_COERCE1:%.*]] to i32
-; AVX512-NEXT: [[A_SROA_8_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[A_COERCE1]], 32
-; AVX512-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[B_COERCE0:%.*]], i64 0
+; AVX512-NEXT: [[TMP31:%.*]] = insertelement <2 x i64> poison, i64 [[B_COERCE0:%.*]], i64 0
+; AVX512-NEXT: [[TMP32:%.*]] = insertelement <2 x i64> [[TMP31]], i64 [[A_COERCE1]], i64 1
+; AVX512-NEXT: [[TMP49:%.*]] = lshr <2 x i64> [[TMP32]], <i64 48, i64 32>
+; AVX512-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[B_COERCE0]], i64 0
; AVX512-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i32>
-; AVX512-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[A_COERCE1]], i64 1
+; AVX512-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP32]], i64 [[A_COERCE0]], i64 0
; AVX512-NEXT: [[TMP8:%.*]] = and <2 x i64> [[TMP7]], splat (i64 65535)
; AVX512-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP5]], splat (i64 65535)
-; AVX512-NEXT: [[CONV_6:%.*]] = and i64 [[A_SROA_8_8_EXTRACT_SHIFT]], 65535
; AVX512-NEXT: [[TMP10:%.*]] = lshr <2 x i64> [[TMP7]], <i64 32, i64 0>
; AVX512-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
; AVX512-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP4]], i64 1
@@ -953,13 +944,11 @@ define { i64, i64 } @avgr_8_u16(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0,
; AVX512-NEXT: [[TMP24:%.*]] = lshr <2 x i64> [[TMP23]], splat (i64 1)
; AVX512-NEXT: [[TMP25:%.*]] = add nuw nsw <2 x i32> [[TMP13]], splat (i32 1)
; AVX512-NEXT: [[TMP26:%.*]] = add nuw nsw <2 x i32> [[TMP25]], [[TMP15]]
+; AVX512-NEXT: [[TMP30:%.*]] = and <2 x i64> [[TMP49]], <i64 -1, i64 65535>
; AVX512-NEXT: [[TMP27:%.*]] = add nuw nsw <2 x i64> [[TMP3]], <i64 1, i64 poison>
; AVX512-NEXT: [[TMP28:%.*]] = and <2 x i64> [[TMP3]], <i64 poison, i64 65535>
; AVX512-NEXT: [[TMP29:%.*]] = shufflevector <2 x i64> [[TMP27]], <2 x i64> [[TMP28]], <2 x i32> <i32 0, i32 3>
-; AVX512-NEXT: [[TMP30:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[CONV_6]], i64 1
-; AVX512-NEXT: [[TMP31:%.*]] = lshr <2 x i64> [[TMP30]], <i64 48, i64 1>
-; AVX512-NEXT: [[TMP32:%.*]] = add nuw nsw <2 x i64> [[TMP30]], <i64 poison, i64 1>
-; AVX512-NEXT: [[TMP33:%.*]] = shufflevector <2 x i64> [[TMP31]], <2 x i64> [[TMP32]], <2 x i32> <i32 0, i32 3>
+; AVX512-NEXT: [[TMP33:%.*]] = add nuw nsw <2 x i64> [[TMP30]], <i64 0, i64 1>
; AVX512-NEXT: [[TMP34:%.*]] = add nuw nsw <2 x i64> [[TMP29]], [[TMP33]]
; AVX512-NEXT: [[TMP35:%.*]] = add nuw nsw <2 x i64> [[TMP18]], splat (i64 1)
; AVX512-NEXT: [[TMP36:%.*]] = add nuw nsw <2 x i64> [[TMP35]], [[TMP21]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
index 77a1c812c52a0..3b4c8b53e35c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
@@ -9,44 +9,39 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> <i64 48, i64 40, i64 24, i64 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = mul <4 x i64> [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP0]], 11
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> <i64 poison, i64 1, i64 poison, i64 1>, <4 x i32> <i32 0, i32 5, i32 poison, i32 7>
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> poison, i64 [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP15]], <4 x i64> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i64> [[TMP21]], <4 x i64> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP22]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 6>
-; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i64> [[TMP32]], <4 x i64> <i64 -1, i64 poison, i64 -8, i64 poison>, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 0, i64 11>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP15]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i64> <i64 poison, i64 1, i64 poison, i64 1>, <4 x i64> [[TMP16]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i64> [[TMP22]], <4 x i64> <i64 -1, i64 poison, i64 -8, i64 poison>, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i64> [[TMP22]], [[TMP33]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP18:%.*]] = shl i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[TMP0]]
-; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i64> [[TMP22]], <4 x i64> <i64 poison, i64 poison, i64 poison, i64 1>, <4 x i32> <i32 2, i32 0, i32 poison, i32 7>
-; CHECK-NEXT: [[TMP81:%.*]] = insertelement <4 x i64> poison, i64 [[TMP19]], i32 0
-; CHECK-NEXT: [[TMP82:%.*]] = shufflevector <4 x i64> [[TMP81]], <4 x i64> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP23]], <4 x i64> [[TMP82]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> <i64 -8, i64 1, i64 1, i64 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 0>
-; CHECK-NEXT: [[TMP38:%.*]] = sub <4 x i64> [[TMP17]], [[TMP37]]
-; CHECK-NEXT: [[TMP29:%.*]] = or <4 x i64> [[TMP17]], [[TMP37]]
-; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <4 x i64> [[TMP38]], <4 x i64> [[TMP29]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP19]], 1
+; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> <i64 poison, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP80:%.*]] = insertelement <64 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 1, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison>, i64 [[TMP1]], i32 11
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> poison, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <14 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i64> [[TMP45]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <2 x i64> [[TMP85]], <2 x i64> <i64 poison, i64 1>, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP87:%.*]] = mul <2 x i64> [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP88:%.*]] = or <2 x i64> [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x i64> [[TMP87]], <2 x i64> [[TMP88]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <2 x i64> [[TMP89]], <2 x i64> poison, <64 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <14 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <2 x i64> [[TMP14]], <2 x i64> <i64 poison, i64 1>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP83:%.*]] = mul <2 x i64> [[TMP14]], [[TMP45]]
+; CHECK-NEXT: [[TMP84:%.*]] = or <2 x i64> [[TMP14]], [[TMP45]]
+; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i64> [[TMP83]], <2 x i64> [[TMP84]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <2 x i64> [[TMP85]], <2 x i64> poison, <64 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i64> [[TMP20]], <4 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 3>
; CHECK-NEXT: br label %[[DOTLR_PH1977_US:.*]]
; CHECK: [[_LR_PH1977_US:.*:]]
; CHECK-NEXT: [[INDVAR37888:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER:%.*]] ], [ 1, %[[DOTLR_PH1977_US]] ]
-; CHECK-NEXT: [[TMP34:%.*]] = mul <4 x i64> [[TMP83]], [[TMP31]]
+; CHECK-NEXT: [[TMP81:%.*]] = insertelement <4 x i64> [[TMP32]], i64 [[TMP21]], i32 1
+; CHECK-NEXT: [[TMP82:%.*]] = insertelement <4 x i64> [[TMP81]], i64 [[TMP23]], i32 2
+; CHECK-NEXT: [[TMP34:%.*]] = mul <4 x i64> [[TMP82]], [[TMP10]]
; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3, i32 3>
-; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP31]]
+; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP10]]
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = or <2 x i64> [[TMP42]], splat (i64 1)
; CHECK-NEXT: [[TMP41:%.*]] = shl <2 x i64> [[TMP42]], splat (i64 1)
@@ -71,7 +66,7 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <64 x i64> [[TMP60]], <64 x i64> [[TMP50]], <28 x i32> <i32 0, i32 65, i32 66, i32 67, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 75, i32 21, i32 77, i32 26, i32 27, i32 1, i32 81, i32 8, i32 9, i32 22, i32 10, i32 2, i32 3, i32 11, i32 23, i32 90, i32 91>
; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <28 x i64> [[TMP61]], <28 x i64> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 11, i32 16, i32 1, i32 17, i32 18, i32 19, i32 20, i32 11, i32 21, i32 13, i32 11, i32 11, i32 13, i32 11, i32 14, i32 15, i32 22, i32 11, i32 1, i32 17, i32 23, i32 24, i32 11, i32 11, i32 21, i32 13, i32 11, i32 11, i32 13, i32 11, i32 15, i32 22, i32 1, i32 17, i32 25, i32 21, i32 21, i32 21, i32 11, i32 11, i32 26, i32 13, i32 14, i32 11, i32 1, i32 17, i32 11, i32 27>
; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <28 x i64> [[TMP61]], <28 x i64> poison, <14 x i32> <i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <14 x i64> [[TMP84]], <14 x i64> [[TMP63]], <14 x i32> <i32 poison, i32 1, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <14 x i64> [[TMP31]], <14 x i64> [[TMP63]], <14 x i32> <i32 poison, i32 0, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <14 x i64> [[TMP64]], i64 [[DOTNEG1]], i32 3
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <14 x i64> [[TMP65]], i64 [[TMP2]], i32 4
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <14 x i64> [[TMP66]], i64 [[TMP3]], i32 5
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll
index 8972b6aba694d..75c7cb6b644eb 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reordered-buildvector-scalars.ll
@@ -101,82 +101,83 @@ define fastcc i32 @test(i32 %0, i32 %add111.i.i, <4 x i32> %PredPel.i.sroa.86.72
; THRESH-NEXT: [[LOOPARRAY_SROA_24_0_I_I3:%.*]] = ashr i32 [[TMP0]], 1
; THRESH-NEXT: [[SHR143_5_I_I9:%.*]] = ashr i32 [[TMP0]], 1
; THRESH-NEXT: [[ADD1392_I:%.*]] = add i32 [[TMP0]], 1
-; THRESH-NEXT: [[PREDPEL_I_SROA_86_80_VEC_EXTRACT59312:%.*]] = extractelement <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT]], i64 0
-; THRESH-NEXT: [[ADD2235_I17:%.*]] = or i32 [[TMP0]], 1
-; THRESH-NEXT: [[ADD2323_I:%.*]] = add i32 [[TMP0]], 1
+; THRESH-NEXT: [[ADD2235_I16:%.*]] = or i32 [[TMP0]], 1
; THRESH-NEXT: [[ADD2190_I:%.*]] = or i32 [[SHR143_5_I_I9]], 1
; THRESH-NEXT: [[ADD2191_I:%.*]] = add i32 [[ADD2190_I]], [[TMP0]]
; THRESH-NEXT: [[CONV2193_I:%.*]] = trunc i32 [[ADD2191_I]] to i16
; THRESH-NEXT: store i16 [[CONV2193_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8232), align 8
-; THRESH-NEXT: [[ADD2203_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], 1
-; THRESH-NEXT: [[ADD2204_I:%.*]] = add i32 [[ADD2203_I]], [[TMP0]]
+; THRESH-NEXT: [[ADD2190_I2:%.*]] = or i32 [[ADD1392_I]], 1
+; THRESH-NEXT: [[ADD2190_I1:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], 1
+; THRESH-NEXT: [[SHR2237_I:%.*]] = add i32 [[ADD2190_I1]], [[TMP0]]
+; THRESH-NEXT: [[ADD2236_I:%.*]] = add i32 [[ADD2235_I16]], 1
+; THRESH-NEXT: [[SHR2343_I:%.*]] = lshr i32 [[ADD2236_I]], 1
; THRESH-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 poison, i32 0>
; THRESH-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[LOOPARRAY_SROA_24_0_I_I3]], i32 0
; THRESH-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1)
; THRESH-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[TMP4]], splat (i32 1)
; THRESH-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
-; THRESH-NEXT: [[CONV2206_I:%.*]] = trunc i32 [[ADD2204_I]] to i16
-; THRESH-NEXT: [[MUL1445_I:%.*]] = shl i32 [[TMP0]], 1
-; THRESH-NEXT: [[ADD2174_I:%.*]] = add i32 [[MUL1445_I]], 2
-; THRESH-NEXT: [[ADD2302_I1:%.*]] = add i32 [[TMP0]], 1
-; THRESH-NEXT: [[SHR2175_I:%.*]] = lshr i32 [[ADD2174_I]], 2
-; THRESH-NEXT: [[SHR2303_I1:%.*]] = lshr i32 [[ADD2302_I1]], 1
-; THRESH-NEXT: [[CONV2176_I:%.*]] = trunc i32 [[SHR2175_I]] to i16
-; THRESH-NEXT: [[CONV2304_I:%.*]] = trunc i32 [[SHR2303_I1]] to i16
-; THRESH-NEXT: store i16 [[CONV2176_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8186), align 2
-; THRESH-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[CONV2304_I]], i32 0
-; THRESH-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> [[TMP7]], i16 [[CONV2176_I]], i32 1
-; THRESH-NEXT: store <2 x i16> [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8224), align 8
-; THRESH-NEXT: store <2 x i16> [[TMP8]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8204), align 4
-; THRESH-NEXT: [[TMP9:%.*]] = insertelement <4 x i16> poison, i16 [[CONV2206_I]], i32 0
-; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; THRESH-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
-; THRESH-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> [[TMP11]], i16 [[CONV2304_I]], i32 3
-; THRESH-NEXT: store <4 x i16> [[TMP12]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8178), align 2
-; THRESH-NEXT: [[ADD2190_I1:%.*]] = or i32 [[ADD1392_I]], 1
-; THRESH-NEXT: [[ADD2236_I:%.*]] = or i32 [[ADD2323_I]], [[TMP0]]
-; THRESH-NEXT: [[ADD2235_I16:%.*]] = or i32 [[TMP0]], 1
-; THRESH-NEXT: [[ADD2258_I:%.*]] = add i32 [[SHR143_5_I_I9]], 1
-; THRESH-NEXT: [[ADD2302_I:%.*]] = add i32 [[ADD111_I_I]], 1
-; THRESH-NEXT: [[SHR2325_I:%.*]] = add i32 [[ADD2190_I1]], [[TMP0]]
-; THRESH-NEXT: [[SHR2237_I:%.*]] = lshr i32 [[ADD2236_I]], 1
-; THRESH-NEXT: [[SHR2343_I:%.*]] = add i32 [[ADD2235_I16]], [[TMP0]]
-; THRESH-NEXT: [[SHR2259_I:%.*]] = lshr i32 [[ADD2258_I]], 1
-; THRESH-NEXT: [[SHR2303_I:%.*]] = lshr i32 [[ADD2302_I]], 1
-; THRESH-NEXT: [[CONV2326_I:%.*]] = trunc i32 [[SHR2325_I]] to i16
; THRESH-NEXT: [[CONV2326_I1:%.*]] = trunc i32 [[SHR2237_I]] to i16
; THRESH-NEXT: [[CONV2344_I:%.*]] = trunc i32 [[SHR2343_I]] to i16
+; THRESH-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> poison, i16 [[CONV2344_I]], i32 0
+; THRESH-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> [[TMP7]], i16 [[CONV2326_I1]], i32 1
+; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; THRESH-NEXT: store <4 x i16> [[TMP10]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8176), align 8
+; THRESH-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[TMP0]], i32 0
+; THRESH-NEXT: [[TMP29:%.*]] = or <2 x i32> [[TMP11]], splat (i32 1)
+; THRESH-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <2 x i32> zeroinitializer
+; THRESH-NEXT: [[TMP31:%.*]] = add <2 x i32> [[TMP28]], [[TMP29]]
+; THRESH-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
+; THRESH-NEXT: [[TMP27:%.*]] = or <2 x i32> [[TMP31]], [[TMP23]]
+; THRESH-NEXT: [[ADD2342_I:%.*]] = add i32 [[SHR143_5_I_I9]], 1
+; THRESH-NEXT: [[ADD2136_I:%.*]] = or i32 [[LOOPARRAY_SROA_24_0_I_I3]], [[TMP0]]
+; THRESH-NEXT: [[ADD2302_I:%.*]] = add i32 [[TMP0]], 1
+; THRESH-NEXT: [[ADD2191_I1:%.*]] = add i32 [[ADD2190_I2]], [[TMP0]]
+; THRESH-NEXT: [[SHR2303_I1:%.*]] = lshr i32 [[ADD2302_I]], 1
+; THRESH-NEXT: [[CONV2304_I:%.*]] = trunc i32 [[SHR2303_I1]] to i16
+; THRESH-NEXT: [[CONV2193_I1:%.*]] = trunc i32 [[ADD2191_I1]] to i16
+; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i16> poison, i16 [[CONV2193_I1]], i32 0
+; THRESH-NEXT: [[TMP16:%.*]] = insertelement <2 x i16> [[TMP15]], i16 [[CONV2304_I]], i32 1
+; THRESH-NEXT: store <2 x i16> [[TMP16]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8222), align 2
+; THRESH-NEXT: store i16 [[CONV2304_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8204), align 4
+; THRESH-NEXT: [[TMP17:%.*]] = insertelement <2 x i16> poison, i16 [[CONV2304_I]], i32 0
+; THRESH-NEXT: [[MUL1445_I:%.*]] = shl i32 [[TMP0]], 1
+; THRESH-NEXT: [[ADD2258_I:%.*]] = or i32 [[ADD111_I_I]], [[TMP0]]
+; THRESH-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[PREDPEL_I_SROA_86_72_VEC_EXTRACT]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
+; THRESH-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[ADD111_I_I]], i32 1
+; THRESH-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[MUL1445_I]], i32 0
+; THRESH-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[ADD2258_I]], i32 3
+; THRESH-NEXT: [[TMP22:%.*]] = add <4 x i32> [[TMP21]], <i32 2, i32 1, i32 1, i32 0>
+; THRESH-NEXT: [[ADD2280_I:%.*]] = extractelement <4 x i32> [[TMP22]], i32 1
+; THRESH-NEXT: [[SHR2259_I:%.*]] = lshr i32 [[ADD2280_I]], 1
+; THRESH-NEXT: [[ADD2174_I:%.*]] = extractelement <4 x i32> [[TMP22]], i32 0
+; THRESH-NEXT: [[SHR2303_I:%.*]] = lshr i32 [[ADD2174_I]], 2
; THRESH-NEXT: [[CONV2344_I1:%.*]] = trunc i32 [[SHR2259_I]] to i16
; THRESH-NEXT: [[CONV2282_I:%.*]] = trunc i32 [[SHR2303_I]] to i16
-; THRESH-NEXT: store i16 [[CONV2282_I]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8228), align 4
-; THRESH-NEXT: [[ADD2236_I1:%.*]] = add i32 [[ADD2235_I17]], 1
-; THRESH-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[ADD111_I_I]], i32 0
-; THRESH-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[LOOPARRAY_SROA_24_0_I_I3]], i32 1
-; THRESH-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
-; THRESH-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> zeroinitializer
-; THRESH-NEXT: [[TMP17:%.*]] = or <2 x i32> [[TMP14]], [[TMP16]]
-; THRESH-NEXT: [[ADD2157_I:%.*]] = add i32 [[PREDPEL_I_SROA_86_80_VEC_EXTRACT59312]], 1
-; THRESH-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> poison, i32 [[ADD2157_I]], i32 0
-; THRESH-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP17]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; THRESH-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
-; THRESH-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[ADD2236_I1]], i32 3
-; THRESH-NEXT: [[TMP22:%.*]] = lshr <4 x i32> [[TMP21]], splat (i32 1)
-; THRESH-NEXT: [[TMP23:%.*]] = trunc <4 x i32> [[TMP22]] to <4 x i16>
-; THRESH-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[TMP23]], i32 0
+; THRESH-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> [[TMP25]], i32 [[ADD2136_I]], i32 4
+; THRESH-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[ADD2342_I]], i32 5
+; THRESH-NEXT: [[TMP44:%.*]] = shufflevector <2 x i32> [[TMP27]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT: [[TMP34:%.*]] = shufflevector <8 x i32> [[TMP33]], <8 x i32> [[TMP44]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; THRESH-NEXT: [[TMP41:%.*]] = lshr <8 x i32> [[TMP34]], <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 1>
+; THRESH-NEXT: [[TMP42:%.*]] = trunc <8 x i32> [[TMP41]] to <8 x i16>
+; THRESH-NEXT: [[TMP32:%.*]] = insertelement <2 x i16> [[TMP17]], i16 [[CONV2282_I]], i32 1
+; THRESH-NEXT: store <2 x i16> [[TMP32]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8184), align 8
+; THRESH-NEXT: [[TMP36:%.*]] = insertelement <2 x i16> poison, i16 [[CONV2282_I]], i32 0
+; THRESH-NEXT: [[TMP40:%.*]] = insertelement <2 x i16> [[TMP36]], i16 [[CONV2344_I1]], i32 1
+; THRESH-NEXT: store <2 x i16> [[TMP40]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8226), align 2
+; THRESH-NEXT: [[TMP35:%.*]] = extractelement <8 x i16> [[TMP42]], i32 4
+; THRESH-NEXT: store i16 [[TMP35]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8174), align 2
+; THRESH-NEXT: [[TMP24:%.*]] = extractelement <8 x i16> [[TMP42]], i32 2
; THRESH-NEXT: store i16 [[TMP24]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8230), align 2
-; THRESH-NEXT: store <4 x i16> [[TMP23]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8170), align 2
-; THRESH-NEXT: [[TMP25:%.*]] = insertelement <8 x i16> poison, i16 [[CONV2282_I]], i32 0
-; THRESH-NEXT: [[TMP26:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; THRESH-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP25]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison>
-; THRESH-NEXT: [[TMP28:%.*]] = insertelement <8 x i16> [[TMP27]], i16 [[CONV2206_I]], i32 5
-; THRESH-NEXT: [[TMP29:%.*]] = insertelement <8 x i16> [[TMP28]], i16 [[CONV2326_I1]], i32 6
-; THRESH-NEXT: [[TMP30:%.*]] = insertelement <8 x i16> [[TMP29]], i16 [[CONV2326_I]], i32 7
+; THRESH-NEXT: [[TMP37:%.*]] = shufflevector <8 x i16> [[TMP42]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; THRESH-NEXT: store <2 x i16> [[TMP37]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8170), align 2
+; THRESH-NEXT: [[TMP38:%.*]] = shufflevector <8 x i16> [[TMP42]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 7, i32 poison>
+; THRESH-NEXT: [[TMP39:%.*]] = insertelement <8 x i16> [[TMP38]], i16 [[CONV2344_I]], i32 4
+; THRESH-NEXT: [[TMP43:%.*]] = insertelement <8 x i16> [[TMP39]], i16 [[CONV2326_I1]], i32 5
+; THRESH-NEXT: [[TMP30:%.*]] = insertelement <8 x i16> [[TMP43]], i16 [[CONV2193_I1]], i32 7
; THRESH-NEXT: store <8 x i16> [[TMP30]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8188), align 4
-; THRESH-NEXT: [[TMP31:%.*]] = insertelement <8 x i16> [[TMP27]], i16 [[CONV2344_I1]], i32 4
-; THRESH-NEXT: [[TMP32:%.*]] = insertelement <8 x i16> [[TMP31]], i16 [[CONV2344_I]], i32 5
-; THRESH-NEXT: [[TMP33:%.*]] = insertelement <8 x i16> [[TMP32]], i16 [[CONV2326_I1]], i32 6
-; THRESH-NEXT: [[TMP34:%.*]] = insertelement <8 x i16> [[TMP33]], i16 [[CONV2326_I]], i32 7
-; THRESH-NEXT: store <8 x i16> [[TMP34]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8208), align 8
+; THRESH-NEXT: store <8 x i16> [[TMP42]], ptr getelementptr inbounds nuw (i8, ptr @images, i64 8206), align 2
; THRESH-NEXT: ret i32 0
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 299677ca80b34..54e5405e21c49 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -11,8 +11,8 @@ define void @test(ptr %0, ptr %1, ptr %2) {
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = sub <4 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
index 3c2472c2ab58d..f8522bc546e6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
@@ -4,15 +4,9 @@
define void @b() {
; CHECK-LABEL: @b(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[MUL:%.*]] = fmul float undef, 2.000000e+00
-; CHECK-NEXT: [[ADD:%.*]] = fadd float undef, 1.000000e+00
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float [[ADD]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[MUL]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = fneg <2 x float> [[TMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[MUL]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP9]], float [[ADD]], i32 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
index 21a93e57f6ec6..b5d9915b690c0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll
@@ -5,14 +5,15 @@ define i16 @test(i16 %v1, i16 %v2) {
; CHECK-LABEL: define i16 @test(
; CHECK-SAME: i16 [[V1:%.*]], i16 [[V2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 poison>, i16 [[V2]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> poison, i16 [[V1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[V2]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP5]], <4 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 poison>, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutable-node-with-non-sched-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutable-node-with-non-sched-parent.ll
index 5b6a0ab98a836..8807ca26767f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutable-node-with-non-sched-parent.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutable-node-with-non-sched-parent.ll
@@ -7,18 +7,20 @@ define void @test() {
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB9:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[SHL4:%.*]] = shl i32 0, 0
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[SHL4]], i32 1
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 -1, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>, <4 x i32> <i32 4, i32 5, i32 3, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[SHL4]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i32> <i32 0, i32 -1>, [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> <i32 0, i32 -1>, [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 0, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; CHECK-NEXT: br label %[[BB6:.*]]
; CHECK: [[BB6]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ [[TMP5]], %[[BB1]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB1]] ]
; CHECK-NEXT: br label %[[BB9]]
; CHECK: [[BB9]]:
-; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ [[TMP6]], %[[BB6]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ [[TMP10]], %[[BB6]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP8]], 0
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index 1abc8102dc332..29b525d137f7f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -5,17 +5,17 @@ define i32 @bar() local_unnamed_addr {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD78_1:%.*]] = add nsw i32 undef, undef
-; CHECK-NEXT: [[SUB86_1:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[ADD94_1:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef
; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef
; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef>, i32 [[SUB102_1]], i32 2
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[ADD94_1]], i32 3
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef>, i32 [[SUB86_1]], i32 2
+; CHECK-NEXT: [[SUB102_4:%.*]] = sub nsw i32 undef, undef
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef>, i32 [[SUB102_3]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 [[ADD78_2]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef>, i32 [[SUB102_1]], i32 2
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[ADD78_2]], i32 5
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[SUB102_3]], i32 6
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[ADD94_1]], i32 5
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[SUB102_4]], i32 6
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef>, <8 x i32> <i32 8, i32 9, i32 3, i32 2, i32 5, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 14>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
index 5e85ecd610ebd..d7797ec26bb45 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-insert-point-for-nodes.ll
@@ -6,8 +6,8 @@ define i32 @test() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], %[[BB24:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ [[TMP17:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP13:%.*]], %[[BB24:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP14:%.*]], %[[BB24]] ], [ zeroinitializer, %[[BB]] ]
; CHECK-NEXT: br i1 false, label %[[BB4:.*]], label %[[BB11:.*]]
; CHECK: [[BB4]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ zeroinitializer, %[[BB1]] ]
@@ -16,27 +16,34 @@ define i32 @test() {
; CHECK: [[BB11]]:
; CHECK-NEXT: br i1 false, label %[[BB12:.*]], label %[[BB16:.*]]
; CHECK: [[BB12]]:
-; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP1]], <i32 poison, i32 poison, i32 0, i32 0>
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 0, [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br label %[[BB13:.*]]
; CHECK: [[BB13]]:
-; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB12]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP6]], %[[BB12]] ]
; CHECK-NEXT: br label %[[BB16]]
; CHECK: [[BB16]]:
-; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB11]] ], [ [[TMP5]], %[[BB13]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB11]] ], [ [[TMP7]], %[[BB13]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
; CHECK-NEXT: br label %[[BB19]]
; CHECK: [[BB19]]:
+; CHECK-NEXT: [[PHI20:%.*]] = phi i32 [ 0, %[[BB4]] ], [ [[TMP10]], %[[BB16]] ]
+; CHECK-NEXT: [[PHI21:%.*]] = phi i32 [ 0, %[[BB4]] ], [ [[TMP9]], %[[BB16]] ]
; CHECK-NEXT: [[PHI22:%.*]] = phi double [ 0.000000e+00, %[[BB4]] ], [ 0.000000e+00, %[[BB16]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB4]] ], [ [[TMP6]], %[[BB16]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], <i32 poison, i32 poison, i32 0, i32 0>
+; CHECK-NEXT: [[OR23:%.*]] = or i32 [[PHI21]], 0
; CHECK-NEXT: br label %[[BB24]]
; CHECK: [[BB24]]:
-; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 poison, i32 poison, i32 0, i32 0>
-; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 0, i32 -1>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
-; CHECK-NEXT: [[TMP17]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 poison, i32 poison, i32 2, i32 7>
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[PHI20]], 0
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[LSHR]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT: [[LSHR25:%.*]] = lshr i32 [[TMP11]], [[AND]]
+; CHECK-NEXT: [[OR26:%.*]] = or i32 0, [[OR23]]
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[OR26]], i32 2
+; CHECK-NEXT: [[TMP13]] = insertelement <4 x i32> [[TMP12]], i32 [[LSHR25]], i32 3
+; CHECK-NEXT: [[TMP14]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[LSHR25]], i32 1
; CHECK-NEXT: br label %[[BB1]]
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
index 0198b1c5cb846..285397fefb68a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
@@ -8,7 +8,7 @@ define i32 @test() {
; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
index 714dee4cb3a07..29c58621ad0af 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
@@ -6,7 +6,7 @@ define i16 @test(i16 %conv11) {
; CHECK-SAME: i16 [[CONV11:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[CONV11]], i32 3
-; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], <double 0.000000e+00, double 0.000000e+00, double 1.000000e+00, double 0.000000e+00>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
index ff1395a450841..4f9ffb77718d0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll
@@ -10,7 +10,7 @@ define i32 @foo() {
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
; CHECK-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
index c0257ac06e98f..c4ade9aad1b42 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-parent-multi-copyables.ll
@@ -6,11 +6,15 @@ define void @test() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB6:.*]]
; CHECK: [[BB1]]:
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>, i32 -1, i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 0, i32 3
-; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, [[TMP6]]
-; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, [[TMP6]]
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[SHL4:%.*]] = shl i32 0, 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[SHL4]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i32> <i32 0, i32 -1>, [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> <i32 0, i32 -1>, [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 0, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
; CHECK-NEXT: br label %[[BB6]]
; CHECK: [[BB6]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ [[TMP4]], %[[BB1]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
index ec554b4607cce..a3ab94054a401 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-extern-use.ll
@@ -7,16 +7,16 @@ define void @test(i32 %arg) {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARG]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[ARG]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[SUB]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
; CHECK-NEXT: br i1 false, label %[[BB8:.*]], label %[[BB4:.*]]
; CHECK: [[BB4]]:
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> zeroinitializer, [[TMP5]]
; CHECK-NEXT: br label %[[BB8]]
; CHECK: [[BB8]]:
-; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB4]] ], [ [[TMP1]], %[[BB1]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP6]], %[[BB4]] ], [ [[TMP3]], %[[BB1]] ]
; CHECK-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
index cd5c95d4b95ae..8830561f9fec7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-schedulable-with-multi-copyables.ll
@@ -5,13 +5,13 @@ define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i
; CHECK-LABEL: define i64 @test(
; CHECK-SAME: ptr [[ARG1:%.*]], i64 [[ALLOCA_PROMOTED344:%.*]], i8 [[LOAD_311_I:%.*]], i1 [[LOAD1_I:%.*]]) {
; CHECK-NEXT: [[BB:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[LOAD_311_I]], i32 3
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> <i8 poison, i8 poison, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i8> <i8 0, i8 0, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 [[LOAD_311_I]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> <i8 0, i8 poison, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, i8 [[LOAD_311_I]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[ALLOCA_PROMOTED344]], i32 1
; CHECK-NEXT: br label %[[BB2:.*]]
; CHECK: [[BB2]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[BB]] ], [ [[TMP28:%.*]], %[[BB12_8_I:.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP29:%.*]], %[[BB12_8_I]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi <8 x i8> [ zeroinitializer, %[[BB]] ], [ [[TMP27:%.*]], %[[BB12_8_I]] ]
; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[SPAM_EXIT:.*]], label %[[BB4_LR_PH_I:.*]]
; CHECK: [[BB4_LR_PH_I]]:
; CHECK-NEXT: br i1 true, label %[[BB3_I_I_PEEL:.*]], label %[[EGGS_EXIT_I_PEEL:.*]]
@@ -19,23 +19,26 @@ define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i
; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], splat (i64 1)
; CHECK-NEXT: [[LOAD4_I_I_PEEL:%.*]] = load i64, ptr [[ARG1]], align 8
; CHECK-NEXT: [[SHL_I_I_PEEL:%.*]] = shl i64 [[LOAD4_I_I_PEEL]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 poison, i32 0>
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[SHL_I_I_PEEL]], i32 1
; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP7]]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP22:%.*]] = or <2 x i64> [[TMP5]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> [[TMP22]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = trunc <2 x i64> [[TMP10]] to <2 x i8>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i8> [[TMP11]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <4 x i32> <i32 poison, i32 0, i32 1, i32 poison>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <4 x i32> <i32 poison, i32 1, i32 2, i32 2>
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
; CHECK-NEXT: br label %[[EGGS_EXIT_I_PEEL]]
; CHECK: [[EGGS_EXIT_I_PEEL]]:
-; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i64> [ [[TMP10]], %[[BB3_I_I_PEEL]] ], [ zeroinitializer, %[[BB4_LR_PH_I]] ]
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP11]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i8>
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP12]], i32 1
+; CHECK-NEXT: [[LOAD5_I_I93_PEEL:%.*]] = phi i64 [ [[TMP15]], %[[BB3_I_I_PEEL]] ], [ 0, %[[BB4_LR_PH_I]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i8> [ [[TMP14]], %[[BB3_I_I_PEEL]] ], [ <i8 poison, i8 0, i8 0, i8 0>, %[[BB4_LR_PH_I]] ]
; CHECK-NEXT: br label %[[SPAM_EXIT]]
; CHECK: [[SPAM_EXIT]]:
-; CHECK-NEXT: [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[TMP14]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ]
+; CHECK-NEXT: [[GETELEMENTPTR_I_I_PROMOTED346:%.*]] = phi i64 [ [[LOAD5_I_I93_PEEL]], %[[EGGS_EXIT_I_PEEL]] ], [ 0, %[[BB2]] ]
; CHECK-NEXT: [[LOAD_8_I:%.*]] = phi i8 [ 0, %[[EGGS_EXIT_I_PEEL]] ], [ 1, %[[BB2]] ]
-; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP13]], %[[EGGS_EXIT_I_PEEL]] ], [ zeroinitializer, %[[BB2]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i8> [ [[TMP16]], %[[EGGS_EXIT_I_PEEL]] ], [ <i8 poison, i8 0, i8 0, i8 0>, %[[BB2]] ]
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 1, i32 2, i32 2, i32 2, i32 2>
; CHECK-NEXT: br i1 [[LOAD1_I]], label %[[BB12_8_I]], label %[[BB12_1_THREAD_I:.*]]
; CHECK: [[BB12_1_THREAD_I]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i8> [[TMP4]], i32 0
@@ -44,38 +47,35 @@ define i64 @test(ptr %arg1, i64 %alloca.promoted344, i8 %load.311.i, i1 %load1.i
; CHECK: [[BB8_3_I]]:
; CHECK-NEXT: br label %[[BB12_3_I]]
; CHECK: [[BB12_3_I]]:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i8> [[TMP4]], i32 7
; CHECK-NEXT: [[ICMP5_7_I:%.*]] = icmp eq i8 [[TMP20]], 0
; CHECK-NEXT: br i1 [[ICMP5_7_I]], label %[[BB12_4_I:.*]], label %[[BB8_4_I:.*]]
; CHECK: [[BB8_4_I]]:
; CHECK-NEXT: br label %[[BB12_4_I]]
; CHECK: [[BB12_4_I]]:
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 2
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i8> [[TMP4]], i32 5
; CHECK-NEXT: [[ICMP5_5_I:%.*]] = icmp eq i8 [[TMP19]], 0
; CHECK-NEXT: br i1 [[ICMP5_5_I]], label %[[BB12_5_I:.*]], label %[[BB8_5_I:.*]]
; CHECK: [[BB8_5_I]]:
; CHECK-NEXT: br label %[[BB12_5_I]]
; CHECK: [[BB12_5_I]]:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i8> [[TMP4]], i32 1
; CHECK-NEXT: [[ICMP5_8_I:%.*]] = icmp eq i8 [[TMP21]], 0
; CHECK-NEXT: br i1 [[ICMP5_8_I]], label %[[BB12_7_I:.*]], label %[[BB8_7_I:.*]]
; CHECK: [[BB8_7_I]]:
; CHECK-NEXT: br label %[[BB12_7_I]]
; CHECK: [[BB12_7_I]]:
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i8> [[TMP4]], i32 4
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i8> [[TMP4]], i32 3
; CHECK-NEXT: [[ICMP5_8_I1:%.*]] = icmp eq i8 [[TMP30]], 0
; CHECK-NEXT: br i1 [[ICMP5_8_I1]], label %[[BB12_8_I]], label %[[BB8_8_I:.*]]
; CHECK: [[BB8_8_I]]:
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 1
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i8> poison, i8 [[LOAD_8_I]], i32 0
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <4 x i32> <i32 poison, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[LOAD_8_I]], i32 3
+; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP24]], <8 x i8> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP25]], <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 6, i32 7>
; CHECK-NEXT: br label %[[BB12_8_I]]
; CHECK: [[BB12_8_I]]:
-; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP22]], %[[BB8_8_I]] ], [ [[TMP15]], %[[SPAM_EXIT]] ]
-; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ zeroinitializer, %[[BB12_7_I]] ], [ [[TMP25]], %[[BB8_8_I]] ], [ [[TMP16]], %[[SPAM_EXIT]] ]
-; CHECK-NEXT: [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 1
-; CHECK-NEXT: [[TMP29]] = shufflevector <4 x i8> [[TMP26]], <4 x i8> [[TMP27]], <8 x i32> <i32 2, i32 7, i32 5, i32 0, i32 1, i32 3, i32 4, i32 6>
+; CHECK-NEXT: [[TMP27]] = phi <8 x i8> [ [[TMP0]], %[[BB12_7_I]] ], [ [[TMP26]], %[[BB8_8_I]] ], [ [[TMP18]], %[[SPAM_EXIT]] ]
+; CHECK-NEXT: [[TMP28]] = insertelement <2 x i64> [[TMP2]], i64 [[GETELEMENTPTR_I_I_PROMOTED346]], i32 0
; CHECK-NEXT: br label %[[BB2]]
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index ad8e905a8ca02..49d39d6408149 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -330,7 +330,7 @@ define void @only_arcp(ptr %x) {
define void @addsub_all_nsw(ptr %x) {
; CHECK-LABEL: @addsub_all_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
+; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 -1, i32 1, i32 -1, i32 1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
@@ -359,7 +359,7 @@ define void @addsub_all_nsw(ptr %x) {
define void @addsub_some_nsw(ptr %x) {
; CHECK-LABEL: @addsub_some_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
+; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP2]], <i32 -1, i32 1, i32 -1, i32 1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
@@ -388,7 +388,7 @@ define void @addsub_some_nsw(ptr %x) {
define void @addsub_no_nsw(ptr %x) {
; CHECK-LABEL: @addsub_no_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 -1, i32 1, i32 -1>
+; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP2]], <i32 -1, i32 1, i32 -1, i32 1>
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[X]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index 20a42777cf8e4..193e375516673 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -10,28 +10,12 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]]
; CHECK: if.then22.i:
; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1
-; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]]
-; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SUB_I]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], <i32 2, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8>
-; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP19]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
-; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i8> [[TMP15]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 -1, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i32> [[TMP3]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP4]] to <16 x i8>
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1)
; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
index 2dd6b395597c3..e88022db95298 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reschedule-only-scheduled.ll
@@ -4,17 +4,18 @@
define i16 @test() {
; CHECK-LABEL: define i16 @test() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 0, 0
-; CHECK-NEXT: [[TMP10:%.*]] = shl i32 [[TMP0]], 0
; CHECK-NEXT: [[CALL99_I:%.*]] = call i32 @llvm.bswap.i32(i32 0)
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[CALL99_I]], 0
; CHECK-NEXT: [[CALL7_I45:%.*]] = tail call i32 null(i32 0)
-; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[CALL7_I45]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <28 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison>, i32 [[TMP10]], i32 4
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <28 x i32> [[TMP4]], i32 [[TMP2]], i32 5
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <28 x i32> [[TMP5]], <28 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 28, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 poison>
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <28 x i32> [[TMP6]], i32 [[TMP8]], i32 12
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <28 x i32> [[TMP7]], <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 poison>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 0>, i32 [[CALL99_I]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[CALL7_I45]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 0>, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 2, i32 3, i32 14, i32 15>
+; CHECK-NEXT: [[TMP8:%.*]] = shl <8 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = shl i32 0, 0
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <28 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison>, <28 x i32> [[TMP10]], <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 36, i32 37, i32 38, i32 39, i32 poison, i32 poison, i32 poison, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 poison>
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <28 x i32> [[TMP18]], i32 [[TMP6]], i32 16
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <28 x i32> [[TMP16]], <28 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 28, i32 29, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <28 x i32> [[TMP9]], <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 28, i32 29, i32 30, i32 31, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
; CHECK-NEXT: [[TMP11:%.*]] = and <28 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, [[TMP17]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll
index 6dc9806da0aa9..830c7755fc53a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll
@@ -4,21 +4,20 @@
define float @test() {
; CHECK-LABEL: define float @test() {
; CHECK-NEXT: [[LABEL:.*]]:
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float 0.000000e+00, i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef, float undef>, <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[TMP7]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
-; CHECK-NEXT: [[TMP21:%.*]] = fsub <8 x float> zeroinitializer, [[TMP8]]
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP21]], <8 x float> poison, <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <20 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, <20 x float> [[TMP24]], <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <12 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float 0.000000e+00, i32 5
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float undef>, float 0.000000e+00, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <12 x float> [[TMP1]], float 0.000000e+00, i32 8
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <12 x float> [[TMP0]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <12 x float> <float 0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float poison>, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = fsub <8 x float> zeroinitializer, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <12 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <12 x float> [[TMP6]], <12 x float> poison, <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <20 x float> [[TMP7]], <20 x float> [[TMP8]], <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
; CHECK-NEXT: br label %[[REGION_30:.*]]
; CHECK: [[REGION_30]]:
-; CHECK-NEXT: [[TMP26:%.*]] = phi <20 x float> [ [[TMP10]], %[[LABEL]] ]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <20 x float> [[TMP26]], i32 7
+; CHECK-NEXT: [[TMP10:%.*]] = phi <20 x float> [ [[TMP9]], %[[LABEL]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <20 x float> [[TMP10]], i32 10
; CHECK-NEXT: ret float [[TMP27]]
;
label:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shl-compatible-with-add.ll b/llvm/test/Transforms/SLPVectorizer/X86/shl-compatible-with-add.ll
index 04a45e4d416f8..43c5862967bb9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shl-compatible-with-add.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shl-compatible-with-add.ll
@@ -14,18 +14,16 @@ define void @intrapred_luma(ptr %0, i16 %1, i32 %conv593) {
; CHECK-NEXT: [[ARRAYIDX590:%.*]] = getelementptr i8, ptr [[DOTPRE]], i64 4304
; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i32> [[TMP4]], <i32 1, i32 0>
; CHECK-NEXT: [[CONV635:%.*]] = zext i16 [[TMP1]] to i32
-; CHECK-NEXT: [[ADD633:%.*]] = add i32 [[CONV635]], 1
-; CHECK-NEXT: [[ADD636:%.*]] = add i32 [[ADD633]], [[CONV593]]
+; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 2, [[CONV593]]
+; CHECK-NEXT: [[ADD636:%.*]] = add i32 [[OP_RDX]], [[CONV635]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[ADD596]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[ADD636]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>, i32 [[CONV593]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, i32 [[CONV593]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32> [[TMP9]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP9]], [[TMP11]]
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], <i32 1, i32 2, i32 1, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP12]], <i32 1, i32 2, i32 1, i32 1>
; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16>
; CHECK-NEXT: store <4 x i16> [[TMP16]], ptr [[ARRAYIDX590]], align 2
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation.ll b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation.ll
index 1cba1bb586e36..509a98a7843d9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation.ll
@@ -5,38 +5,26 @@ define void @test(ptr %src, i8 %0, i32 %conv2) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[SRC:%.*]], i8 [[TMP0:%.*]], i32 [[CONV2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV65:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT: [[ADD36:%.*]] = add i32 [[CONV65]], 1
-; CHECK-NEXT: [[ADD37:%.*]] = or i32 [[ADD36]], [[CONV2]]
-; CHECK-NEXT: [[CONV4:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT: [[ADD38:%.*]] = or i32 [[ADD37]], [[CONV4]]
-; CHECK-NEXT: [[SHR39:%.*]] = lshr i32 [[ADD38]], 1
-; CHECK-NEXT: [[CONV40:%.*]] = trunc i32 [[SHR39]] to i8
; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr i8, ptr [[SRC]], i64 1
-; CHECK-NEXT: store i8 [[CONV40]], ptr [[ARRAYIDX41]], align 1
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV4]], 1
-; CHECK-NEXT: [[ADD45:%.*]] = or i32 [[ADD]], [[CONV2]]
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SRC]], align 1
-; CHECK-NEXT: [[CONV8:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[ADD46:%.*]] = or i32 [[ADD45]], [[CONV8]]
-; CHECK-NEXT: [[SHR47:%.*]] = lshr i32 [[ADD46]], 1
-; CHECK-NEXT: [[CONV48:%.*]] = trunc i32 [[SHR47]] to i8
-; CHECK-NEXT: [[ARRAYIDX49:%.*]] = getelementptr i8, ptr [[SRC]], i64 2
-; CHECK-NEXT: store i8 [[CONV48]], ptr [[ARRAYIDX49]], align 1
-; CHECK-NEXT: [[MUL52:%.*]] = shl i32 [[CONV8]], 1
-; CHECK-NEXT: [[ADD54:%.*]] = or i32 [[MUL52]], 1
-; CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT: [[ADD55:%.*]] = add i32 [[ADD54]], [[CONV10]]
-; CHECK-NEXT: [[SHR56:%.*]] = lshr i32 [[ADD55]], 1
-; CHECK-NEXT: [[CONV57:%.*]] = trunc i32 [[SHR56]] to i8
-; CHECK-NEXT: [[ARRAYIDX58:%.*]] = getelementptr i8, ptr [[SRC]], i64 3
-; CHECK-NEXT: store i8 [[CONV57]], ptr [[ARRAYIDX58]], align 1
-; CHECK-NEXT: [[ADD63:%.*]] = add i32 [[CONV8]], 1
-; CHECK-NEXT: [[ADD64:%.*]] = or i32 [[ADD63]], [[CONV10]]
-; CHECK-NEXT: [[SHR66:%.*]] = lshr i32 [[ADD64]], 1
-; CHECK-NEXT: [[CONV67:%.*]] = trunc i32 [[SHR66]] to i8
-; CHECK-NEXT: [[ARRAYIDX68:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
-; CHECK-NEXT: store i8 [[CONV67]], ptr [[ARRAYIDX68]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
+; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[TMP6]] to <4 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1)
+; CHECK-NEXT: [[TMP9:%.*]] = shl <4 x i32> [[TMP5]], splat (i32 1)
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 1, i32 0>, i32 [[CONV2]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
+; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP10]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP7]]
+; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP13]], [[TMP7]]
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT: [[TMP17:%.*]] = lshr <4 x i32> [[TMP16]], splat (i32 1)
+; CHECK-NEXT: [[TMP18:%.*]] = trunc <4 x i32> [[TMP17]] to <4 x i8>
+; CHECK-NEXT: store <4 x i8> [[TMP18]], ptr [[ARRAYIDX41]], align 1
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
index 8f29f3f8de460..a25e340a75ab4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
@@ -13,7 +13,9 @@ define i32 @test(i32 %0, i32 %1) {
; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i32> [[TMP3]], [[TMP12]]
; CHECK-NEXT: [[TMP21:%.*]] = shl <2 x i32> [[TMP3]], [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP21]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = shl <2 x i32> [[TMP3]], splat (i32 1)
+; CHECK-NEXT: [[TMP11:%.*]] = sub <2 x i32> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 32), align 16
; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @st, i64 12), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr getelementptr inbounds nuw (i8, ptr @st, i64 8), align 8
@@ -21,16 +23,15 @@ define i32 @test(i32 %0, i32 %1) {
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP8]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: store <2 x i32> [[TMP9]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 8), align 8
-; CHECK-NEXT: [[SUB120_3:%.*]] = or i32 [[TMP5]], [[DOTNEG_NEG]]
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> <i32 1, i32 poison, i32 1, i32 1>, i32 [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = shl <4 x i32> [[TMP10]], <i32 0, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP14:%.*]] = sub <2 x i32> zeroinitializer, [[TMP13]]
-; CHECK-NEXT: store <2 x i32> [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 32), align 16
-; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, <4 x i32> <i32 1, i32 5, i32 1, i32 poison>
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[SUB120_3]], i32 3
-; CHECK-NEXT: [[TMP17:%.*]] = shl <4 x i32> [[TMP16]], [[TMP11]]
-; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], [[TMP11]]
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP5]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i32> [[TMP22]], [[TMP10]]
+; CHECK-NEXT: [[TMP18:%.*]] = or <2 x i32> [[TMP22]], [[TMP10]]
+; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP23]], <2 x i32> [[TMP18]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x i32> [[TMP24]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> [[TMP26]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
+; CHECK-NEXT: [[TMP19:%.*]] = shl <4 x i32> [[TMP17]], <i32 1, i32 0, i32 1, i32 1>
; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 16), align 16
; CHECK-NEXT: ret i32 0
;
@@ -120,28 +121,25 @@ define i32 @test2(i32 %0) {
; CHECK-LABEL: define i32 @test2(
; CHECK-SAME: i32 [[TMP0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[TMP0]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> splat (i32 1), [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], <i32 1, i32 0>
-; CHECK-NEXT: [[TMP10:%.*]] = shl <2 x i32> [[TMP9]], splat (i32 1)
+; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP8]], splat (i32 1)
+; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[TMP0]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP8]], <i32 1, i32 0>
+; CHECK-NEXT: [[TMP10:%.*]] = shl <2 x i32> [[TMP14]], splat (i32 1)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
; CHECK-NEXT: store i32 [[TMP11]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 20), align 4
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP0]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP0]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 4, i32 2, i32 poison>
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[ADD110_3]], i32 3
+; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> [[TMP13]], <4 x i32> <i32 0, i32 4, i32 2, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[ADD110_3]], i32 3
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP15]], [[TMP17]]
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP12]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP12]], [[TMP17]]
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP19]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: store <4 x i32> [[TMP20]], ptr @st, align 4
; CHECK-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
index 39c7602c95828..2d4797afafa2a 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -177,10 +177,8 @@ define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], <i8 0, i8 5>
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], <i8 123, i8 1>
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i8> [[TMP3]], <i8 123, i8 1>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> [[TMP5]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], <i8 123, i8 5>
+; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP3]], <i8 0, i8 1>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%add = xor i8 %inp, 5
diff --git a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
index 201643d981f35..69ac94e6dbe5e 100644
--- a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll
@@ -5,22 +5,22 @@
define i1 @test(float %0, double %1) {
; X86-LABEL: define i1 @test
; X86-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) {
-; X86-NEXT: [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3
-; X86-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double>
-; X86-NEXT: [[TMP5:%.*]] = insertelement <8 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 1.000000e+00, double 1.000000e+00>, double [[TMP1]], i32 4
-; X86-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; X86-NEXT: [[TMP8:%.*]] = fmul <8 x double> zeroinitializer, [[TMP7]]
-; X86-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 13, i32 13>
-; X86-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; X86-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 poison>
-; X86-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; X86-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]]
-; X86-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x double> [[TMP14]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; X86-NEXT: [[TMP16:%.*]] = fsub <8 x double> [[TMP15]], [[TMP8]]
-; X86-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP15]], [[TMP8]]
-; X86-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
+; X86-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; X86-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
+; X86-NEXT: [[TMP5:%.*]] = fmul double 0.000000e+00, 0.000000e+00
+; X86-NEXT: [[TMP6:%.*]] = insertelement <8 x double> <double 0.000000e+00, double undef, double 0.000000e+00, double undef, double undef, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, double [[TMP1]], i32 4
+; X86-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; X86-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 5, i32 6, i32 7>
+; X86-NEXT: [[TMP9:%.*]] = fmul <8 x double> zeroinitializer, [[TMP8]]
+; X86-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> <double 0.000000e+00, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 8, i32 4, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
+; X86-NEXT: [[TMP11:%.*]] = insertelement <8 x double> poison, double [[TMP5]], i32 0
+; X86-NEXT: [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
+; X86-NEXT: [[TMP13:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; X86-NEXT: [[TMP14:%.*]] = shufflevector <8 x double> [[TMP8]], <8 x double> [[TMP13]], <8 x i32> <i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 10, i32 poison, i32 poison>
+; X86-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP14]], <8 x double> <double poison, double poison, double poison, double 0.000000e+00, double 1.000000e+00, double poison, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 poison, i32 poison, i32 2, i32 11, i32 12, i32 5, i32 14, i32 15>
+; X86-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef, double undef, double undef, double undef, double undef>, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; X86-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP13]], [[TMP16]]
+; X86-NEXT: [[TMP18:%.*]] = fsub <8 x double> [[TMP17]], [[TMP9]]
; X86-NEXT: [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float>
; X86-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer
; X86-NEXT: [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer
@@ -30,29 +30,31 @@ define i1 @test(float %0, double %1) {
;
; AARCH64-LABEL: define i1 @test
; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) {
-; AARCH64-NEXT: [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3
-; AARCH64-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double>
-; AARCH64-NEXT: [[TMP5:%.*]] = insertelement <8 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 1.000000e+00, double 1.000000e+00>, double [[TMP1]], i32 4
-; AARCH64-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AARCH64-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AARCH64-NEXT: [[TMP8:%.*]] = fmul <8 x double> zeroinitializer, [[TMP7]]
-; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 13, i32 13>
-; AARCH64-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; AARCH64-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <4 x i32> <i32 2, i32 0, i32 poison, i32 poison>
-; AARCH64-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 poison, i32 7>
-; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 5, i32 3>
-; AARCH64-NEXT: [[TMP14:%.*]] = fmul <4 x double> [[TMP10]], [[TMP13]]
-; AARCH64-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AARCH64-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x double> [[TMP15]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; AARCH64-NEXT: [[TMP17:%.*]] = fsub <8 x double> [[TMP16]], [[TMP8]]
-; AARCH64-NEXT: [[TMP18:%.*]] = fmul <8 x double> [[TMP16]], [[TMP8]]
-; AARCH64-NEXT: [[TMP19:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15>
-; AARCH64-NEXT: [[TMP20:%.*]] = fptrunc <8 x double> [[TMP19]] to <8 x float>
-; AARCH64-NEXT: [[TMP21:%.*]] = fmul <8 x float> [[TMP20]], zeroinitializer
-; AARCH64-NEXT: [[TMP22:%.*]] = fcmp oeq <8 x float> [[TMP21]], zeroinitializer
-; AARCH64-NEXT: [[TMP23:%.*]] = freeze <8 x i1> [[TMP22]]
-; AARCH64-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP23]])
-; AARCH64-NEXT: ret i1 [[TMP24]]
+; AARCH64-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; AARCH64-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
+; AARCH64-NEXT: [[TMP5:%.*]] = fmul double 0.000000e+00, 0.000000e+00
+; AARCH64-NEXT: [[TMP6:%.*]] = insertelement <8 x double> <double undef, double undef, double 0.000000e+00, double undef, double undef, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, double [[TMP1]], i32 4
+; AARCH64-NEXT: [[TMP7:%.*]] = insertelement <8 x double> [[TMP6]], double 0.000000e+00, i32 0
+; AARCH64-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 5, i32 6, i32 7>
+; AARCH64-NEXT: [[TMP10:%.*]] = fmul <8 x double> zeroinitializer, [[TMP9]]
+; AARCH64-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> <double 0.000000e+00, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 8, i32 4, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
+; AARCH64-NEXT: [[TMP12:%.*]] = insertelement <8 x double> poison, double [[TMP5]], i32 0
+; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <8 x double> [[TMP12]], <8 x double> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
+; AARCH64-NEXT: [[TMP14:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+; AARCH64-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP14]], <8 x double> <double poison, double poison, double poison, double 0.000000e+00, double 1.000000e+00, double poison, double 0.000000e+00, double 0.000000e+00>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 11, i32 12, i32 2, i32 14, i32 15>
+; AARCH64-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AARCH64-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AARCH64-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP17]], <8 x i32> <i32 poison, i32 poison, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AARCH64-NEXT: [[TMP19:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef, double undef, double undef, double undef, double undef>, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AARCH64-NEXT: [[TMP20:%.*]] = fmul <8 x double> [[TMP14]], [[TMP19]]
+; AARCH64-NEXT: [[TMP21:%.*]] = fsub <8 x double> [[TMP20]], [[TMP10]]
+; AARCH64-NEXT: [[TMP22:%.*]] = fptrunc <8 x double> [[TMP21]] to <8 x float>
+; AARCH64-NEXT: [[TMP23:%.*]] = fmul <8 x float> [[TMP22]], zeroinitializer
+; AARCH64-NEXT: [[TMP24:%.*]] = fcmp oeq <8 x float> [[TMP23]], zeroinitializer
+; AARCH64-NEXT: [[TMP25:%.*]] = freeze <8 x i1> [[TMP24]]
+; AARCH64-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP25]])
+; AARCH64-NEXT: ret i1 [[TMP26]]
;
%3 = fpext float %0 to double
%4 = fpext float 0.000000e+00 to double
diff --git a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
index 7ed1edc278806..5480b9867b32e 100644
--- a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
+++ b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
@@ -11,10 +11,8 @@ define i64 @src(i32 %a) {
; X86-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; X86-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
; X86-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], splat (i64 4294967297)
-; X86-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP4]], splat (i64 1)
-; X86-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; X86-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; X86-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP5]], <i64 1, i64 1, i64 1, i64 1, i64 -1, i64 -1, i64 -1, i64 -1>
; X86-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP8]])
; X86-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP9]], 4294967297
; X86-NEXT: [[OP_RDX1:%.*]] = add i64 [[OP_RDX]], [[TMP0]]
@@ -28,10 +26,8 @@ define i64 @src(i32 %a) {
; AARCH64-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; AARCH64-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
; AARCH64-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], splat (i64 4294967297)
-; AARCH64-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP4]], splat (i64 1)
-; AARCH64-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AARCH64-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AARCH64-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AARCH64-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; AARCH64-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP5]], <i64 1, i64 1, i64 1, i64 1, i64 -1, i64 -1, i64 -1, i64 -1>
; AARCH64-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP8]])
; AARCH64-NEXT: [[OP_RDX:%.*]] = add i64 [[TMP9]], 4294967297
; AARCH64-NEXT: [[OP_RDX1:%.*]] = add i64 [[OP_RDX]], [[TMP0]]
diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll
index c90e76c6d00f7..47fc5e73f373f 100644
--- a/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reduction-modified-values.ll
@@ -6,10 +6,8 @@ define i32 @test() {
; X86-LABEL: @test(
; X86-NEXT: bb:
; X86-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; X86-NEXT: [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
-; X86-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; X86-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; X86-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
; X86-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
; X86-NEXT: ret i32 [[TMP5]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
index c809a6ee1fe98..69b275c796ccd 100644
--- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll
@@ -5,9 +5,8 @@ define void @func(i32 %0) {
; CHECK-LABEL: define void @func(
; CHECK-SAME: i32 [[TMP0:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0
@@ -56,6 +55,7 @@ define void @func(i32 %0) {
; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP20]], i32 14
; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP55]]
+; CHECK-NEXT: [[TMP80:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP20]], i32 13
; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP57]]
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP20]], i32 12
More information about the llvm-commits
mailing list