[llvm] [SLP]Remove Alternate early profitability checks in favor of throttling (PR #182760)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 22 10:22:21 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Removes early check, which may prevent some further optimizations, in
favor of tree throttling.
---
Patch is 43.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182760.diff
12 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+4-136)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll (+8-4)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll (+12-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll (+6-20)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll (-21)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll (-19)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll (+2-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/shl-to-add-transformation5.ll (+7-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/small-graph-diff-block-instructions.ll (+7-2)
- (modified) llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll (+28-23)
- (modified) llvm/test/Transforms/SLPVectorizer/ext-int-reduced-not-operand.ll (+7-23)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4caa1707f0f27..0f960fa719a29 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -129,11 +129,6 @@ static cl::opt<int>
cl::desc("Only vectorize if you gain more than this "
"number "));
-static cl::opt<bool> SLPSkipEarlyProfitabilityCheck(
- "slp-skip-early-profitability-check", cl::init(false), cl::Hidden,
- cl::desc("When true, SLP vectorizer bypasses profitability checks based on "
- "heuristics and makes vectorization decision via cost modeling."));
-
static cl::opt<bool>
ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
@@ -4608,15 +4603,6 @@ class slpvectorizer::BoUpSLP {
return nullptr;
}
- /// Check that the operand node of alternate node does not generate
- /// buildvector sequence. If it is, then probably not worth it to build
- /// alternate shuffle, if number of buildvector operands + alternate
- /// instruction > than the number of buildvector instructions.
- /// \param S the instructions state of the analyzed values.
- /// \param VL list of the instructions with alternate opcodes.
- bool areAltOperandsProfitable(const InstructionsState &S,
- ArrayRef<Value *> VL) const;
-
/// Contains all the outputs of legality analysis for a list of values to
/// vectorize.
class ScalarsVectorizationLegality {
@@ -10244,120 +10230,6 @@ static std::pair<size_t, size_t> generateKeySubkey(
static bool isMainInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp, const TargetLibraryInfo &TLI);
-bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
- ArrayRef<Value *> VL) const {
- Type *ScalarTy = S.getMainOp()->getType();
- unsigned Opcode0 = S.getOpcode();
- unsigned Opcode1 = S.getAltOpcode();
- SmallBitVector OpcodeMask(getAltInstrMask(VL, ScalarTy, Opcode0, Opcode1));
- // If this pattern is supported by the target then consider it profitable.
- if (TTI->isLegalAltInstr(getWidenedType(ScalarTy, VL.size()), Opcode0,
- Opcode1, OpcodeMask))
- return true;
- SmallVector<ValueList> Operands;
- for (unsigned I : seq<unsigned>(S.getMainOp()->getNumOperands())) {
- Operands.emplace_back();
- // Prepare the operand vector.
- for (Value *V : VL) {
- if (isa<PoisonValue>(V)) {
- Operands.back().push_back(
- PoisonValue::get(S.getMainOp()->getOperand(I)->getType()));
- continue;
- }
- Operands.back().push_back(cast<Instruction>(V)->getOperand(I));
- }
- }
- if (Operands.size() == 2) {
- // Try find best operands candidates.
- for (unsigned I : seq<unsigned>(0, VL.size() - 1)) {
- SmallVector<std::pair<Value *, Value *>> Candidates(3);
- Candidates[0] = std::make_pair(Operands[0][I], Operands[0][I + 1]);
- Candidates[1] = std::make_pair(Operands[0][I], Operands[1][I + 1]);
- Candidates[2] = std::make_pair(Operands[1][I], Operands[0][I + 1]);
- std::optional<int> Res = findBestRootPair(Candidates);
- switch (Res.value_or(0)) {
- case 0:
- break;
- case 1:
- std::swap(Operands[0][I + 1], Operands[1][I + 1]);
- break;
- case 2:
- std::swap(Operands[0][I], Operands[1][I]);
- break;
- default:
- llvm_unreachable("Unexpected index.");
- }
- }
- }
- DenseSet<unsigned> UniqueOpcodes;
- constexpr unsigned NumAltInsts = 3; // main + alt + shuffle.
- unsigned NonInstCnt = 0;
- // Estimate number of instructions, required for the vectorized node and for
- // the buildvector node.
- unsigned UndefCnt = 0;
- // Count the number of extra shuffles, required for vector nodes.
- unsigned ExtraShuffleInsts = 0;
- // Check that operands do not contain same values and create either perfect
- // diamond match or shuffled match.
- if (Operands.size() == 2) {
- // Do not count same operands twice.
- if (Operands.front() == Operands.back()) {
- Operands.erase(Operands.begin());
- } else if (!allConstant(Operands.front()) &&
- all_of(Operands.front(), [&](Value *V) {
- return is_contained(Operands.back(), V);
- })) {
- Operands.erase(Operands.begin());
- ++ExtraShuffleInsts;
- }
- }
- const Loop *L = LI->getLoopFor(S.getMainOp()->getParent());
- // Vectorize node, if:
- // 1. at least single operand is constant or splat.
- // 2. Operands have many loop invariants (the instructions are not loop
- // invariants).
- // 3. At least single unique operands is supposed to vectorized.
- return none_of(Operands,
- [&](ArrayRef<Value *> Op) {
- if (allConstant(Op) ||
- (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) &&
- getSameOpcode(Op, *TLI)))
- return false;
- DenseMap<Value *, unsigned> Uniques;
- for (Value *V : Op) {
- if (isa<Constant, ExtractElementInst>(V) ||
- isVectorized(V) || (L && L->isLoopInvariant(V))) {
- if (isa<UndefValue>(V))
- ++UndefCnt;
- continue;
- }
- auto Res = Uniques.try_emplace(V, 0);
- // Found first duplicate - need to add shuffle.
- if (!Res.second && Res.first->second == 1)
- ++ExtraShuffleInsts;
- ++Res.first->getSecond();
- if (auto *I = dyn_cast<Instruction>(V))
- UniqueOpcodes.insert(I->getOpcode());
- else if (Res.second)
- ++NonInstCnt;
- }
- return none_of(Uniques, [&](const auto &P) {
- return P.first->hasNUsesOrMore(P.second + 1) &&
- none_of(P.first->users(), [&](User *U) {
- return isVectorized(U) || Uniques.contains(U);
- });
- });
- }) ||
- // Do not vectorize node, if estimated number of vector instructions is
- // more than estimated number of buildvector instructions. Number of
- // vector operands is number of vector instructions + number of vector
- // instructions for operands (buildvectors). Number of buildvector
- // instructions is just number_of_operands * number_of_scalars.
- (UndefCnt < (VL.size() - 1) * S.getMainOp()->getNumOperands() &&
- (UniqueOpcodes.size() + NonInstCnt + ExtraShuffleInsts +
- NumAltInsts) < S.getMainOp()->getNumOperands() * VL.size());
-}
-
/// Builds the arguments types vector for the given call instruction with the
/// given \p ID for the specified vector factor.
static SmallVector<Type *>
@@ -10810,13 +10682,6 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return TreeEntry::NeedToGather;
}
- if (!SLPSkipEarlyProfitabilityCheck && !areAltOperandsProfitable(S, VL)) {
- LLVM_DEBUG(
- dbgs()
- << "SLP: ShuffleVector not vectorized, operands are buildvector and "
- "the whole alt sequence is not profitable.\n");
- return TreeEntry::NeedToGather;
- }
return TreeEntry::Vectorize;
}
@@ -17247,6 +17112,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
auto It = MinBWs.find(TE);
if (It != MinBWs.end())
ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
+ if (isa<CmpInst>(TE->Scalars.front()))
+ ScalarTy = TE->Scalars.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
const unsigned EntryVF = TE->getVectorFactor();
auto *FinalVecTy = getWidenedType(ScalarTy, EntryVF);
@@ -17275,7 +17142,8 @@ InstructionCost BoUpSLP::calculateTreeCostAndTrimNonProfitable(
::getShuffleCost(*TTI, TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
// If all scalars are reused in gather node(s) or other vector nodes, there
// might be extra cost for inserting them.
- if (all_of(TE->Scalars, [&](Value *V) {
+ if ((!TE->hasState() || !TE->isAltShuffle()) &&
+ all_of(TE->Scalars, [&](Value *V) {
return (TE->hasCopyableElements() && TE->isCopyableElement(V)) ||
isConstant(V) || isGathered(V) || getTreeEntries(V).size() > 1;
}))
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
index 38705032ce1c9..77a1c812c52a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/externally-used-copyables.ll
@@ -34,6 +34,13 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP80:%.*]] = insertelement <64 x i64> <i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 1, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison, i64 poison>, i64 [[TMP1]], i32 11
; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> poison, <28 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <14 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i64> [[TMP45]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <2 x i64> [[TMP85]], <2 x i64> <i64 poison, i64 1>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP87:%.*]] = mul <2 x i64> [[TMP85]], [[TMP86]]
+; CHECK-NEXT: [[TMP88:%.*]] = or <2 x i64> [[TMP85]], [[TMP86]]
+; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x i64> [[TMP87]], <2 x i64> [[TMP88]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <2 x i64> [[TMP89]], <2 x i64> poison, <64 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: br label %[[DOTLR_PH1977_US:.*]]
; CHECK: [[_LR_PH1977_US:.*:]]
; CHECK-NEXT: [[INDVAR37888:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER:%.*]] ], [ 1, %[[DOTLR_PH1977_US]] ]
@@ -41,15 +48,12 @@ define void @test(i64 %0, i64 %1, i64 %2, i64 %3, i64 %.sroa.3341.0.copyload, i6
; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3, i32 3>
; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i64> [[TMP20]], [[TMP31]]
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP27:%.*]] = mul i64 [[TMP0]], [[TMP0]]
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP40:%.*]] = or <2 x i64> [[TMP42]], splat (i64 1)
; CHECK-NEXT: [[TMP41:%.*]] = shl <2 x i64> [[TMP42]], splat (i64 1)
; CHECK-NEXT: [[TMP39:%.*]] = mul i64 [[TMP0]], [[TMP0]]
; CHECK-NEXT: [[TMP43:%.*]] = add <8 x i64> [[TMP35]], [[TMP25]]
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <64 x i64> [[TMP80]], i64 [[INDVAR37888]], i32 1
-; CHECK-NEXT: [[TMP45:%.*]] = insertelement <64 x i64> [[TMP44]], i64 [[TMP27]], i32 2
-; CHECK-NEXT: [[TMP46:%.*]] = insertelement <64 x i64> [[TMP45]], i64 [[TMP30]], i32 3
+; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <64 x i64> [[TMP44]], <64 x i64> [[TMP90]], <64 x i32> <i32 poison, i32 1, i32 64, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> poison, <64 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i64> [[TMP34]], <4 x i64> poison, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <64 x i64> [[TMP46]], <64 x i64> [[TMP48]], <64 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 11, i32 poison, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 66, i32 27, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
index fc805b226d3b7..06c4bc205adf0 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/shuffled-gather-casted.ll
@@ -59,12 +59,20 @@ define i32 @test1(ptr %p) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[D_0:%.*]] = load i16, ptr [[P]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 poison, i16 0, i16 0, i16 0>, i16 [[D_0]], i32 0
+; CHECK-NEXT: [[SZERO_2:%.*]] = sext i16 -1 to i32
+; CHECK-NEXT: [[UZERO_1:%.*]] = zext i16 -1 to i32
+; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[UZERO_1]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[SZERO_2]], i32 3
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> <i32 -1, i32 -16383, i32 undef, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP13]], [[TMP12]]
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP2]], <i32 -1, i32 -16383, i32 65535, i32 -1>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -16383, i32 65535, i32 -1>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[TMP4]], <i32 65535, i32 -16383, i32 65535, i32 65535>
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> <i32 4, i32 3, i32 2, i32 1>
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> [[TMP10]], <4 x i16> <i16 4, i16 3, i16 2, i16 1>
+; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
; CHECK-NEXT: ret i32 [[TMP8]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
index f2ea2df7cc982..78fc3a60f0514 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll
@@ -1,31 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s
-; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\
-; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED
define void @test(i16 %0) {
-; FORCED-LABEL: @test(
-; FORCED-NEXT: for.body92.preheader:
-; FORCED-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1
-; FORCED-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
-; FORCED-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
-; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/182760
More information about the llvm-commits
mailing list