[llvm] e0e5000 - [SLP]Remove Alternate early profitability checks in favor of throttling
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 06:37:55 PDT 2026
Author: Alexey Bataev
Date: 2026-03-08T09:37:51-04:00
New Revision: e0e5000ea70b115a69ca8802003ac9be56907d4f
URL: https://github.com/llvm/llvm-project/commit/e0e5000ea70b115a69ca8802003ac9be56907d4f
DIFF: https://github.com/llvm/llvm-project/commit/e0e5000ea70b115a69ca8802003ac9be56907d4f.diff
LOG: [SLP]Remove Alternate early profitability checks in favor of throttling
Removes early check, which may prevent some further optimizations, in
favor of tree throttling.
Reviewers: RKSimon, hiraditya
Pull Request: https://github.com/llvm/llvm-project/pull/182760
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll
llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 591ee2fea3148..2e33a6a5d3303 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -129,11 +129,6 @@ static cl::opt<int>
cl::desc("Only vectorize if you gain more than this "
"number "));
-static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
- "slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
- cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
- "heuristics and makes vectorization decision via cost modeling."));
-
static cl::opt<bool>
ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
@@ -4608,15 +4603,6 @@ class slpvectorizer::BoUpSLP {
return nullptr;
}
- /// Check that the operand node of alternate node does not generate
- /// buildvector sequence. If it is, then probably not worth it to build
- /// alternate shuffle, if number of buildvector operands + alternate
- /// instruction > than the number of buildvector instructions.
- /// \param S the instructions state of the analyzed values.
- /// \param VL list of the instructions with alternate opcodes.
- bool areAltOperandsProfitable(const InstructionsState &S,
- ArrayRef<Value *> VL) const;
-
/// Contains all the outputs of legality analysis for a list of values to
/// vectorize.
class ScalarsVectorizationLegality {
@@ -10244,120 +10230,6 @@ static std::pair<size_t, size_t> generateKeySubkey(
static bool isMainInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp, const TargetLibraryInfo &TLI);
-bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
- ArrayRef<Value *> VL) const {
- Type *ScalarTy = S.getMainOp()->getType();
- unsigned Opcode0 = S.getOpcode();
- unsigned Opcode1 = S.getAltOpcode();
- SmallBitVector OpcodeMask(getAltInstrMask(VL, ScalarTy, Opcode0, Opcode1));
- // If this pattern is supported by the target then consider it profitable.
- if (TTI->isLegalAltInstr(getWidenedType(ScalarTy, VL.size()), Opcode0,
- Opcode1, OpcodeMask))
- return true;
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands())) {
- Operands.emplace_back();
- // Prepare the operand vector.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Operands.back().push_back(
- PoisonValue::get(S.getMainOp()->getOperand(I)->getType()));
- continue;
- }
- Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
- }
- }
- if (Operands.size() == 2) {
- // Try find best operands candidates.
- for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
- SmallVector<std::pair<Value *, Value *>> Candidates(3);
- Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
- Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
- Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
- std::optional<int> Res = findBestRootPair(Candidates);
- switch (Res.value_or(0)) {
- case 0:
- break;
- case 1:
- std::swap(Operands[0][I + 1], Operands[1][I + 1]);
- break;
- case 2:
- std::swap(Operands[0][I], Operands[1][I]);
- break;
- default:
- llvm_unreachable("Unexpected index.");
- }
- }
- }
- DenseSet<unsigned> UniqueOpcodes;
- constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
- unsigned NonInstCnt = 0;
- // Estimate number of instructions, required for the vectorized node and for
- // the buildvector node.
- unsigned UndefCnt = 0;
- // Count the number of extra shuffles, required for vector nodes.
- unsigned ExtraShuffleInsts = 0;
- // Check that operands do not contain same values and create either perfect
- // diamond match or shuffled match.
- if (Operands.size() == 2) {
- // Do not count same operands twice.
- if (Operands.front() == Operands.back()) {
- Operands.erase(Operands.begin());
- } else if (!allConstant(Operands.front()) &&
- all_of(Operands.front(), [&](Value *V) {
- return is_contained(Operands.back(), V);
- })) {
- Operands.erase(Operands.begin());
- ++ExtraShuffleInsts;
- }
- }
- const Loop *L = LI->getLoopFor(S.getMainOp()->getParent());
- // Vectorize node, if:
- // 1. at least single operand is constant or splat.
- // 2. Operands have many loop invariants (the instructions are not loop
- // invariants).
- // 3. At least single unique operands is supposed to vectorized.
- return none_of(Operands,
- [&](ArrayRef<Value *> Op) {
- if (allConstant(Op) ||
- (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
- getSameOpcode(Op, *TLI)))
- return false;
- DenseMap<Value *, unsigned> Uniques;
- for (Value *V : Op) {
- if (isa<Constant, ExtractElementInst>(V) ||
- isVectorized(V) || (L && L->isLoopInvariant(V))) {
- if (isa<UndefValue>(V))
- ++UndefCnt;
- continue;
- }
- auto Res = Uniques.try_emplace(V, 0);
- // Found first duplicate - need to add shuffle.
- if (!Res.second && Res.first->second == 1)
- ++ExtraShuffleInsts;
- ++Res.first->getSecond();
- if (auto *I = dyn_cast<Instruction>(V))
- UniqueOpcodes.insert(I->getOpcode());
- else if (Res.second)
- ++NonInstCnt;
- }
- return none_of(Uniques, [&](const auto &P) {
- return P.first->hasNUsesOrMore(P.second + 1) &&
- none_of(P.first->users(), [&](User *U) {
- return isVectorized(U) || Uniques.contains(U);
- });
- });
- }) ||
- // Do not vectorize node, if estimated number of vector instructions is
- // more than estimated number of buildvector instructions. Number of
- // vector operands is number of vector instructions + number of vector
- // instructions for operands (buildvectors). Number of buildvector
- // instructions is just number_of_operands * number_of_scalars.
- (UndefCnt < (VL.size() - 1) * S.getMainOp()->getNumOperands() &&
- (UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
- NumAltInsts) < S.getMainOp()->getNumOperands() * VL.size());
-}
-
/// Builds the arguments types vector for the given call instruction with the
/// given \p ID for the specified vector factor.
static SmallVector<Type *>
@@ -10827,13 +10699,6 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
- if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
- LLVM_DEBUG(
- dbgs()
- << "SLP: ShuffleVector not vectorized, operands are buildvector and "
- "the whole alt sequence is not profitable.\n");
- return TreeEntry::NeedToGather;
- }
return TreeEntry::Vectorize;
}
@@ -17274,6 +17139,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
auto It = MinBWs.find(TE);
if (It != MinBWs.end())
ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
+ if (isa<CmpInst>(TE->Scalars.front()))
+ ScalarTy = TE->Scalars.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
const unsigned EntryVF = TE->getVectorFactor();
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
@@ -17302,7 +17169,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
// If all scalars are reused in gather node(s) or other vector nodes, there
// might be extra cost for inserting them.
- if (all_of(TE->Scalars, [&](Value *V) {
+ if ((!TE->hasState() || !TE->isAltShuffle()) &&
+ all_of(TE->Scalars, [&](Value *V) {
return (TE->hasCopyableElements() && TE->isCopyableElement(V)) ||
isConstant(V) || isGathered(V) || getTreeEntries(V).size() > 1;
}))
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
index 38705032ce1c9..77a1c812c52a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
@@ -34,6 +34,13 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP80:%.*]] = insertelement <64 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 1, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison>, i64 [[TMP1]], i32 11
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> poison, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <14 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i64> [[TMP45]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <2 x i64> [[TMP85]], <2 x i64> <i64 poison, i64 1>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP87:%.*]] = mul <2 x i64> [[TMP85]], [[TMP86]]
+; CHECK-NEXT: [[TMP88:%.*]] = or <2 x i64> [[TMP85]], [[TMP86]]
+; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x i64> [[TMP87]], <2 x i64> [[TMP88]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <2 x i64> [[TMP89]], <2 x i64> poison, <64 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[DOTLR_PH1977_US:.*]]
; CHECK: [[_LR_PH1977_US:.*:]]
; CHECK-NEXT: [[INDVAR37888:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER:%.*]] ], [ 1, %[[DOTLR_PH1977_US]] ]
@@ -41,15 +48,12 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3, i32 3>
; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP31]]
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP0]], [[TMP0]]
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP40:%.*]] = or <2 x i64> [[TMP42]], splat (i64 1)
; CHECK-NEXT: [[TMP41:%.*]] = shl <2 x i64> [[TMP42]], splat (i64 1)
; CHECK-NEXT: [[TMP39:%.*]] = mul i64 [[TMP0]], [[TMP0]]
; CHECK-NEXT: [[TMP43:%.*]] = add <8 x i64> [[TMP35]], [[TMP25]]
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <64 x i64> [[TMP80]], i64 [[INDVAR37888]], i32 1
-; CHECK-NEXT: [[TMP45:%.*]] = insertelement <64 x i64> [[TMP44]], i64 [[TMP27]], i32 2
-; CHECK-NEXT: [[TMP46:%.*]] = insertelement <64 x i64> [[TMP45]], i64 [[TMP30]], i32 3
+; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <64 x i64> [[TMP44]], <64 x i64> [[TMP90]], <64 x i32> <i32 poison, i32 1, i32 64, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> poison, <64 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <64 x i64> [[TMP46]], <64 x i64> [[TMP48]], <64 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 66, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
index fc805b226d3b7..06c4bc205adf0 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
@@ -59,12 +59,20 @@ define i32 @test1(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 [[D_0]], i32 0
+; CHECK-NEXT: [[SZERO_2:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT: [[UZERO_1:%.*]] = zext i16 -1 to i32
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[UZERO_1]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SZERO_2]], i32 3
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> <i32 -1, i32 -16383, i32 undef, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], <i32 -1, i32 -16383, i32 65535, i32 -1>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -16383, i32 65535, i32 -1>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], <i32 65535, i32 -16383, i32 65535, i32 65535>
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> <i32 4, i32 3, i32 2, i32 1>
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP10]], <4 x i16> <i16 4, i16 3, i16 2, i16 1>
+; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: ret i32 [[TMP8]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index f2ea2df7cc982..78fc3a60f0514 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -1,31 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s
-; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @test(i16 %0) {
-; FORCED-LABEL: @test(
-; FORCED-NEXT: for.body92.preheader:
-; FORCED-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
-; FORCED-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
-; FORCED-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
-; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
-; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-; FORCED-NEXT: br label [[FOR_BODY92:%.*]]
-; FORCED: for.body92:
-; FORCED-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
-; FORCED-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8
-; FORCED-NEXT: br label [[FOR_BODY92]]
-;
; CHECK-LABEL: @test(
; CHECK-NEXT: for.body92.preheader:
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
; CHECK-NEXT: br label [[FOR_BODY92:%.*]]
; CHECK: for.body92:
-; CHECK-NEXT: [[CONV177_I:%.*]] = sext i16 0 to i32
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0:%.*]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, i32 [[CONV177_I]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 2
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]]
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8
; CHECK-NEXT: br label [[FOR_BODY92]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
index 5a9ea0d292fa0..7ce45b872fae4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll
@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s
-; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define i64 @foo() {
; CHECK-LABEL: define i64 @foo() {
@@ -23,25 +21,6 @@ define i64 @foo() {
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
;
-; FORCED-LABEL: define i64 @foo() {
-; FORCED-NEXT: bb:
-; FORCED-NEXT: [[ADD7:%.*]] = add i64 0, 0
-; FORCED-NEXT: br label [[BB3:%.*]]
-; FORCED: bb1:
-; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
-; FORCED-NEXT: ret i64 0
-; FORCED: bb3:
-; FORCED-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
-; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
-; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
-; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
-; FORCED-NEXT: [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
-; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
-; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
-; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
-; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
-;
bb:
br label %bb3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
index c5442b7fb7f13..6e656ba942a6b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll
@@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @foo() {
; CHECK-LABEL: define void @foo() {
@@ -21,23 +19,6 @@ define void @foo() {
; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
; CHECK-NEXT: ret void
;
-; FORCED-LABEL: define void @foo() {
-; FORCED-NEXT: bb:
-; FORCED-NEXT: br label [[BB1:%.*]]
-; FORCED: bb1:
-; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ]
-; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], zeroinitializer
-; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer
-; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
-; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]])
-; FORCED-NEXT: br label [[BB4]]
-; FORCED: bb4:
-; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]]
-; FORCED: bb5:
-; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP2]], [[BB4]] ]
-; FORCED-NEXT: ret void
-;
bb:
br label %bb1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
index 93258f2975f34..098a2cd02caed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll
@@ -160,9 +160,9 @@ define void @subadd_and_external_users(ptr %A, ptr %ptr) {
; CHECK-NEXT: [[LD:%.*]] = load double, ptr undef, align 8
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP1]], <double 1.200000e+00, double 1.100000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], <double 2.200000e+00, double 2.100000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], <double 3.200000e+00, double 3.100000e+00>
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[A:%.*]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
index 6fea312b99b25..8f29f3f8de460 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll
@@ -7,10 +7,13 @@ define i32 @test(i32 %0, i32 %1) {
; CHECK-LABEL: define i32 @test(
; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[TMP0]], 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i32> [[TMP3]], [[TMP12]]
+; CHECK-NEXT: [[TMP21:%.*]] = shl <2 x i32> [[TMP3]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP21]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[TMP0]], 1
; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], <i32 0, i32 1>
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @st, i64 12), align 4
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr getelementptr inbounds nuw (i8, ptr @st, i64 8), align 8
@@ -21,8 +24,6 @@ define i32 @test(i32 %0, i32 %1) {
; CHECK-NEXT: [[SUB120_3:%.*]] = or i32 [[TMP5]], [[DOTNEG_NEG]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> <i32 1, i32 poison, i32 1, i32 1>, i32 [[TMP0]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = shl <4 x i32> [[TMP10]], <i32 0, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[ADD110]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[DOTNEG_NEG]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = sub <2 x i32> zeroinitializer, [[TMP13]]
; CHECK-NEXT: store <2 x i32> [[TMP14]], ptr getelementptr inbounds nuw (i8, ptr @st, i64 32), align 16
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> <i32 poison, i32 0, i32 poison, i32 poison>, <4 x i32> <i32 1, i32 5, i32 1, i32 poison>
@@ -65,11 +66,11 @@ define i32 @test1(ptr %0, ptr %1, i32 %2) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[ADD53_1:%.*]] = add i32 [[TMP4]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ADD53_1]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[ADD53_1:%.*]] = add i32 [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ADD53_1]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> <i32 2, i32 1>
; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
index 1c4de256468c7..d6edf69882e82 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll
@@ -7,14 +7,14 @@ define void @rftbsub(ptr %a) {
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2
; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 2, 1
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT: [[SUB22:%.*]] = fsub double undef, undef
; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX12]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8
; CHECK-NEXT: [[ADD16:%.*]] = fadd double [[TMP1]], undef
; CHECK-NEXT: [[MUL18:%.*]] = fmul double undef, [[ADD16]]
-; CHECK-NEXT: [[ADD19:%.*]] = fadd double undef, [[MUL18]]
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[ADD19]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[SUB22]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double poison, double undef>, double [[MUL18]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> undef, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP7]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[ARRAYIDX6]], align 8
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/small-graph-
diff -block-instructions.ll b/llvm/test/Transforms/SLPVectorizer/X86/small-graph-
diff -block-instructions.ll
index 82c8b1d707cf4..62c40cd5810bb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/small-graph-
diff -block-instructions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/small-graph-
diff -block-instructions.ll
@@ -6,12 +6,17 @@ define i32 @test(i32 %arg, i32 %arg1) {
; CHECK-SAME: i32 [[ARG:%.*]], i32 [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[BB:.*:]]
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[ARG1]] to i64
-; CHECK-NEXT: [[ZEXT2:%.*]] = zext i32 [[ARG]] to i64
-; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[ARG]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[ARG]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br label %[[BB3:.*]]
; CHECK: [[BB3]]:
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT]]
+; CHECK-NEXT: [[ZEXT2:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT2]]
+; CHECK-NEXT: [[SEXT:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[SEXT]]
; CHECK-NEXT: [[ZEXT6:%.*]] = zext i32 0 to i64
; CHECK-NEXT: [[GETELEMENTPTR7:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ZEXT6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
index b23da5fa263f6..39c7602c95828 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -32,11 +32,12 @@ define <2 x float> @test_frem(float %a, i1 %cmp) {
define <2 x float> @replace_through_casts(i16 %inp) {
; CHECK-LABEL: define <2 x float> @replace_through_casts(
; CHECK-SAME: i16 [[INP:%.*]]) {
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
-; CHECK-NEXT: [[TMP1:%.*]] = uitofp i16 [[INP]] to float
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp i16 [[ADD]] to float
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i64 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], <i16 0, i16 -10>
+; CHECK-NEXT: [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x float>
+; CHECK-NEXT: [[TMP5:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x float>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x float> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -50,10 +51,11 @@ define <2 x float> @replace_through_casts(i16 %inp) {
define <2 x float> @replace_through_casts_and_binop(i16 %inp) {
; CHECK-LABEL: define <2 x float> @replace_through_casts_and_binop(
; CHECK-SAME: i16 [[INP:%.*]]) {
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[INP]], 5
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[MUL]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> [[TMP1]], i16 [[ADD]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <2 x i16> [[TMP6]], <i16 5, i16 -10>
+; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i16> [[TMP6]], <i16 5, i16 -10>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> [[TMP8]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP3:%.*]] = uitofp <2 x i16> [[TMP2]] to <2 x float>
; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP2]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x i32> <i32 0, i32 3>
@@ -117,11 +119,12 @@ define <2 x float> @replace_through_casts_through_splat(i16 %inp) {
define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts(
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[INP]], -10
-; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[INP]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[ADD]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], <i16 0, i16 -10>
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%add = add nsw i16 %inp, -10
@@ -135,10 +138,11 @@ define <2 x i32> @replace_through_int_casts(i16 %inp, <2 x i16> %dead) {
define <2 x i32> @replace_through_int_casts_ele0_only(i16 %inp, <2 x i16> %dead) {
; CHECK-LABEL: define <2 x i32> @replace_through_int_casts_ele0_only(
; CHECK-SAME: i16 [[INP:%.*]], <2 x i16> [[DEAD:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[INP]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[INP]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> poison, i16 [[INP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%2 = sext i16 %inp to i32
@@ -171,11 +175,12 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d,
define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) {
; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags(
; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) {
-; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5
-; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0
-; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], <i8 0, i8 5>
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i8> [[TMP3]], <i8 123, i8 1>
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i8> [[TMP3]], <i8 123, i8 1>
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> [[TMP5]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <2 x i8> [[R]]
;
%add = xor i8 %inp, 5
diff --git a/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll
index d8021538252c8..b1c1623b070d2 100644
--- a/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll
+++ b/llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll
@@ -1,29 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s %}
-; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED %}
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-99999\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED %}
define i64 @wombat() {
-; FORCED-LABEL: define i64 @wombat() {
-; FORCED-NEXT: bb:
-; FORCED-NEXT: br label [[BB2:%.*]]
-; FORCED: bb1:
-; FORCED-NEXT: br label [[BB2]]
-; FORCED: bb2:
-; FORCED-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
-; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
-; FORCED-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
-; FORCED-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
-; FORCED-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
-; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
-; FORCED-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
-; FORCED-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
-; FORCED-NEXT: ret i64 [[OR]]
-;
; CHECK-LABEL: define i64 @wombat() {
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB2:%.*]]
@@ -31,8 +10,13 @@ define i64 @wombat() {
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[PHI]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[PHI]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
; CHECK-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
; CHECK-NEXT: ret i64 [[OR]]
;
More information about the llvm-commits
mailing list