[llvm] Revert "[SLP]Improve/fix subvectors in gather/buildvector nodes handling" (PR #105780)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 21:21:20 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
Author: Vitaly Buka (vitalybuka)
<details>
<summary>Changes</summary>
with "[Vectorize] Fix warnings"
It introduced compiler crashes, see #<!-- -->104144.
This reverts commit 69332bb8995aef60d830406de12cb79a50390261 and 351f4a5593f1ef507708ec5eeca165b20add3340..
---
Patch is 181.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105780.diff
27 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+183-145)
- (modified) llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll (+37-37)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll (+5-6)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll (+96-96)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll (+218-147)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll (+19-43)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll (+5-2)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll (+4-4)
- (modified) llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll (+13-19)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll (+4-3)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll (+26-22)
- (modified) llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll (+6-5)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/addsub.ll (+10-8)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll (+24-19)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll (+14-13)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll (+7-6)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll (+9-7)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll (+6-12)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll (+9-10)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/phi.ll (+17-19)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll (+8-9)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll (+13-3)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll (+12-14)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/schedule_budget_debug_info.ll (+12-28)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll (+22-17)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll (+3-2)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e8ab6839d9fa87..d7763a022f3b6e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3094,10 +3094,6 @@ class BoUpSLP {
/// The index of this treeEntry in VectorizableTree.
int Idx = -1;
- /// For gather/buildvector/alt opcode (TODO) nodes, which are combined from
- /// other nodes as a series of insertvector instructions.
- SmallVector<std::pair<unsigned, unsigned>, 0> CombinedEntriesWithIndices;
-
private:
/// The operands of each instruction in each lane Operands[op_index][lane].
/// Note: This helps avoid the replication of the code that performs the
@@ -3398,9 +3394,7 @@ class BoUpSLP {
if (!isConstant(V)) {
auto *I = dyn_cast<CastInst>(V);
AllConstsOrCasts &= I && I->getType()->isIntegerTy();
- if (UserTreeIdx.EdgeIdx != UINT_MAX || !UserTreeIdx.UserTE ||
- !UserTreeIdx.UserTE->isGather())
- ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
+ ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
}
if (AllConstsOrCasts)
CastMaxMinBWSizes =
@@ -8355,49 +8349,8 @@ getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
void BoUpSLP::transformNodes() {
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- // The tree may grow here, so iterate over nodes, built before.
- for (unsigned Idx : seq<unsigned>(VectorizableTree.size())) {
- TreeEntry &E = *VectorizableTree[Idx];
- if (E.isGather()) {
- ArrayRef<Value *> VL = E.Scalars;
- const unsigned Sz = getVectorElementSize(VL.front());
- unsigned MinVF = getMinVF(2 * Sz);
- if (VL.size() <= 2 ||
- (E.getOpcode() &&
- (E.isAltShuffle() || E.getOpcode() != Instruction::Load)))
- continue;
- // Try to find vectorizable sequences and transform them into a series of
- // insertvector instructions.
- unsigned StartIdx = 0;
- unsigned End = VL.size();
- for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) {
- for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) {
- ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
- InstructionsState S = getSameOpcode(Slice, *TLI);
- if (!S.getOpcode() || S.isAltShuffle() ||
- (S.getOpcode() != Instruction::Load &&
- any_of(Slice, [&](Value *V) {
- return !areAllUsersVectorized(cast<Instruction>(V),
- UserIgnoreList);
- })))
- continue;
- if (!getTreeEntry(Slice.front()) && !getTreeEntry(Slice.back())) {
- unsigned PrevSize = VectorizableTree.size();
- buildTree_rec(Slice, 0, EdgeInfo(&E, UINT_MAX));
- if (PrevSize + 1 == VectorizableTree.size() &&
- VectorizableTree[PrevSize]->isGather()) {
- VectorizableTree.pop_back();
- continue;
- }
- E.CombinedEntriesWithIndices.emplace_back(PrevSize, Cnt);
- if (StartIdx == Cnt)
- StartIdx = Cnt + VF;
- if (End == Cnt + VF)
- End = Cnt;
- }
- }
- }
- }
+ for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ TreeEntry &E = *TE;
switch (E.getOpcode()) {
case Instruction::Load: {
// No need to reorder masked gather loads, just reorder the scalar
@@ -8520,7 +8473,175 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
auto *VecTy = getWidenedType(ScalarTy, VL.size());
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL);
- if (!Root && isSplat(VL)) {
+ // Improve gather cost for gather of loads, if we can group some of the
+ // loads into vector loads.
+ InstructionsState S = getSameOpcode(VL, *R.TLI);
+ const unsigned Sz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned MinVF = R.getMinVF(2 * Sz);
+ if (VL.size() > 2 &&
+ ((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
+ (InVectors.empty() &&
+ any_of(seq<unsigned>(0, VL.size() / MinVF),
+ [&](unsigned Idx) {
+ ArrayRef<Value *> SubVL = VL.slice(Idx * MinVF, MinVF);
+ InstructionsState S = getSameOpcode(SubVL, *R.TLI);
+ return S.getOpcode() == Instruction::Load &&
+ !S.isAltShuffle();
+ }))) &&
+ !all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
+ !isSplat(Gathers)) {
+ InstructionCost BaseCost = R.getGatherCost(Gathers, !Root, ScalarTy);
+ SetVector<Value *> VectorizedLoads;
+ SmallVector<std::pair<unsigned, LoadsState>> VectorizedStarts;
+ SmallVector<unsigned> ScatterVectorized;
+ unsigned StartIdx = 0;
+ unsigned VF = VL.size() / 2;
+ for (; VF >= MinVF; VF /= 2) {
+ for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
+ Cnt += VF) {
+ ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+ if (S.getOpcode() != Instruction::Load || S.isAltShuffle()) {
+ InstructionsState SliceS = getSameOpcode(Slice, *R.TLI);
+ if (SliceS.getOpcode() != Instruction::Load ||
+ SliceS.isAltShuffle())
+ continue;
+ }
+ if (!VectorizedLoads.count(Slice.front()) &&
+ !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
+ SmallVector<Value *> PointerOps;
+ OrdersType CurrentOrder;
+ LoadsState LS = R.canVectorizeLoads(Slice, Slice.front(),
+ CurrentOrder, PointerOps);
+ switch (LS) {
+ case LoadsState::Vectorize:
+ case LoadsState::ScatterVectorize:
+ case LoadsState::StridedVectorize:
+ // Mark the vectorized loads so that we don't vectorize them
+ // again.
+ // TODO: better handling of loads with reorders.
+ if (((LS == LoadsState::Vectorize ||
+ LS == LoadsState::StridedVectorize) &&
+ CurrentOrder.empty()) ||
+ (LS == LoadsState::StridedVectorize &&
+ isReverseOrder(CurrentOrder)))
+ VectorizedStarts.emplace_back(Cnt, LS);
+ else
+ ScatterVectorized.push_back(Cnt);
+ VectorizedLoads.insert(Slice.begin(), Slice.end());
+ // If we vectorized initial block, no need to try to vectorize
+ // it again.
+ if (Cnt == StartIdx)
+ StartIdx += VF;
+ break;
+ case LoadsState::Gather:
+ break;
+ }
+ }
+ }
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= VL.size())
+ break;
+ // Found vectorizable parts - exit.
+ if (!VectorizedLoads.empty())
+ break;
+ }
+ if (!VectorizedLoads.empty()) {
+ unsigned NumParts = TTI.getNumberOfParts(VecTy);
+ bool NeedInsertSubvectorAnalysis =
+ !NumParts || (VL.size() / VF) > NumParts;
+ // Get the cost for gathered loads.
+ for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
+ if (VectorizedLoads.contains(VL[I]))
+ continue;
+ GatherCost +=
+ getBuildVectorCost(VL.slice(I, std::min(End - I, VF)), Root);
+ }
+ // Exclude potentially vectorized loads from list of gathered
+ // scalars.
+ Gathers.assign(Gathers.size(), PoisonValue::get(VL.front()->getType()));
+ // The cost for vectorized loads.
+ InstructionCost ScalarsCost = 0;
+ for (Value *V : VectorizedLoads) {
+ auto *LI = cast<LoadInst>(V);
+ ScalarsCost +=
+ TTI.getMemoryOpCost(Instruction::Load, LI->getType(),
+ LI->getAlign(), LI->getPointerAddressSpace(),
+ CostKind, TTI::OperandValueInfo(), LI);
+ }
+ auto *LoadTy = getWidenedType(VL.front()->getType(), VF);
+ for (const std::pair<unsigned, LoadsState> &P : VectorizedStarts) {
+ auto *LI = cast<LoadInst>(VL[P.first]);
+ Align Alignment = LI->getAlign();
+ GatherCost +=
+ P.second == LoadsState::Vectorize
+ ? TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
+ LI->getPointerAddressSpace(), CostKind,
+ TTI::OperandValueInfo(), LI)
+ : TTI.getStridedMemoryOpCost(
+ Instruction::Load, LoadTy, LI->getPointerOperand(),
+ /*VariableMask=*/false, Alignment, CostKind, LI);
+ // Add external uses costs.
+ for (auto [Idx, V] : enumerate(VL.slice(
+ P.first, std::min<unsigned>(VL.size() - P.first, VF))))
+ if (!R.areAllUsersVectorized(cast<Instruction>(V)))
+ GatherCost += TTI.getVectorInstrCost(Instruction::ExtractElement,
+ LoadTy, CostKind, Idx);
+ // Estimate GEP cost.
+ SmallVector<Value *> PointerOps(VF);
+ for (auto [I, V] : enumerate(VL.slice(P.first, VF)))
+ PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
+ auto [ScalarGEPCost, VectorGEPCost] =
+ getGEPCosts(TTI, PointerOps, LI->getPointerOperand(),
+ Instruction::Load, CostKind, LI->getType(), LoadTy);
+ GatherCost += VectorGEPCost - ScalarGEPCost;
+ }
+ for (unsigned P : ScatterVectorized) {
+ auto *LI0 = cast<LoadInst>(VL[P]);
+ ArrayRef<Value *> Slice = VL.slice(P, VF);
+ Align CommonAlignment = computeCommonAlignment<LoadInst>(Slice);
+ GatherCost += TTI.getGatherScatterOpCost(
+ Instruction::Load, LoadTy, LI0->getPointerOperand(),
+ /*VariableMask=*/false, CommonAlignment, CostKind, LI0);
+ // Estimate GEP cost.
+ SmallVector<Value *> PointerOps(VF);
+ for (auto [I, V] : enumerate(Slice))
+ PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
+ OrdersType Order;
+ if (sortPtrAccesses(PointerOps, LI0->getType(), *R.DL, *R.SE,
+ Order)) {
+ // TODO: improve checks if GEPs can be vectorized.
+ Value *Ptr0 = PointerOps.front();
+ Type *ScalarTy = Ptr0->getType();
+ auto *VecTy = getWidenedType(ScalarTy, VF);
+ auto [ScalarGEPCost, VectorGEPCost] =
+ getGEPCosts(TTI, PointerOps, Ptr0, Instruction::GetElementPtr,
+ CostKind, ScalarTy, VecTy);
+ GatherCost += VectorGEPCost - ScalarGEPCost;
+ if (!Order.empty()) {
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ GatherCost += ::getShuffleCost(TTI, TTI::SK_PermuteSingleSrc,
+ VecTy, Mask, CostKind);
+ }
+ } else {
+ GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true,
+ PointerOps.front()->getType());
+ }
+ }
+ if (NeedInsertSubvectorAnalysis) {
+ // Add the cost for the subvectors insert.
+ SmallVector<int> ShuffleMask(VL.size());
+ for (unsigned I = VF, E = VL.size(); I < E; I += VF) {
+ for (unsigned Idx : seq<unsigned>(0, E))
+ ShuffleMask[Idx] = Idx / VF == I ? E + Idx % VF : Idx;
+ GatherCost += ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy,
+ ShuffleMask, CostKind, I, LoadTy);
+ }
+ }
+ GatherCost -= ScalarsCost;
+ }
+ GatherCost = std::min(BaseCost, GatherCost);
+ } else if (!Root && isSplat(VL)) {
// Found the broadcasting of the single scalar, calculate the cost as
// the broadcast.
const auto *It = find_if_not(VL, IsaPred<UndefValue>);
@@ -9268,9 +9389,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
InstructionCost
- finalize(ArrayRef<int> ExtMask,
- ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
- unsigned VF = 0,
+ finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
IsFinalized = true;
if (Action) {
@@ -9288,29 +9407,6 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
Action(V, CommonMask);
InVectors.front() = V;
}
- if (!SubVectors.empty()) {
- const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
- if (InVectors.size() == 2)
- Cost += createShuffle(Vec, InVectors.back(), CommonMask);
- else
- Cost += createShuffle(Vec, nullptr, CommonMask);
- for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (CommonMask[Idx] != PoisonMaskElem)
- CommonMask[Idx] = Idx;
- for (const auto &[E, Idx] : SubVectors) {
- Cost += ::getShuffleCost(
- TTI, TTI::SK_InsertSubvector,
- FixedVectorType::get(ScalarTy, CommonMask.size()), std::nullopt,
- CostKind, Idx,
- FixedVectorType::get(ScalarTy, E->getVectorFactor()));
- if (!CommonMask.empty()) {
- std::iota(std::next(CommonMask.begin(), Idx),
- std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
- Idx);
- }
- }
- }
-
::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
if (CommonMask.empty()) {
assert(InVectors.size() == 1 && "Expected only one vector with no mask");
@@ -12408,9 +12504,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// \param Action the action (if any) to be performed before final applying of
/// the \p ExtMask mask.
Value *
- finalize(ArrayRef<int> ExtMask,
- ArrayRef<std::pair<const TreeEntry *, unsigned>> SubVectors,
- unsigned VF = 0,
+ finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
IsFinalized = true;
SmallVector<int> NewExtMask(ExtMask);
@@ -12444,29 +12538,6 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
Action(Vec, CommonMask);
InVectors.front() = Vec;
}
- if (!SubVectors.empty()) {
- Value *Vec = InVectors.front();
- if (InVectors.size() == 2) {
- Vec = createShuffle(Vec, InVectors.back(), CommonMask);
- InVectors.pop_back();
- } else {
- Vec = createShuffle(Vec, nullptr, CommonMask);
- }
- for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (CommonMask[Idx] != PoisonMaskElem)
- CommonMask[Idx] = Idx;
- for (const auto &[E, Idx] : SubVectors) {
- Vec = Builder.CreateInsertVector(
- Vec->getType(), Vec, E->VectorizedValue, Builder.getInt64(Idx));
- if (!CommonMask.empty()) {
- std::iota(std::next(CommonMask.begin(), Idx),
- std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
- Idx);
- }
- }
- InVectors.front() = Vec;
- }
-
if (!ExtMask.empty()) {
if (CommonMask.empty()) {
CommonMask.assign(ExtMask.begin(), ExtMask.end());
@@ -12545,14 +12616,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
: ScalarTy,
Builder, *this);
ShuffleBuilder.add(V, Mask);
- SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
- E->CombinedEntriesWithIndices.size());
- transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
- [&](const auto &P) {
- return std::make_pair(VectorizableTree[P.first].get(),
- P.second);
- });
- return ShuffleBuilder.finalize(std::nullopt, SubVectors);
+ return ShuffleBuilder.finalize(std::nullopt);
};
Value *V = vectorizeTree(VE, PostponedPHIs);
if (VF * getNumElements(VL[0]->getType()) !=
@@ -12635,17 +12699,6 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
SmallVector<int> ReuseShuffleIndices(E->ReuseShuffleIndices.begin(),
E->ReuseShuffleIndices.end());
SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
- // Clear values, to be replaced by insertvector instructions.
- for (const auto &[EIdx, Idx] : E->CombinedEntriesWithIndices)
- for_each(MutableArrayRef(GatheredScalars)
- .slice(Idx, VectorizableTree[EIdx]->getVectorFactor()),
- [&](Value *&V) { V = PoisonValue::get(V->getType()); });
- SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
- E->CombinedEntriesWithIndices.size());
- transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
- [&](const auto &P) {
- return std::make_pair(VectorizableTree[P.first].get(), P.second);
- });
// Build a mask out of the reorder indices and reorder scalars per this
// mask.
SmallVector<int> ReorderMask;
@@ -12783,7 +12836,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
}
ShuffleBuilder.add(*FrontTE, Mask);
- Res = ShuffleBuilder.finalize(E->getCommonMask(), SubVectors);
+ Res = ShuffleBuilder.finalize(E->getCommonMask());
return Res;
}
if (!Resized) {
@@ -13040,10 +13093,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
(IsSingleShuffle && ((IsIdentityShuffle &&
IsNonPoisoned) || IsUsedInExpr) && isa<UndefValue>(V));
}))
- Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices, SubVectors);
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
else
Res = ShuffleBuilder.finalize(
- E->ReuseShuffleIndices, SubVectors, E->Scalars.size(),
+ E->ReuseShuffleIndices, E->Scalars.size(),
[&](Value *&Vec, SmallVectorImpl<int> &Mask) {
TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false);
Vec = ShuffleBuilder.gather(NonConstants, Mask.size(), Vec);
@@ -13054,7 +13107,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
TryPackScalars(GatheredScalars, ReuseMask, /*IsRootPoison=*/true);
Value *BV = ShuffleBuilder.gather(GatheredScalars, ReuseMask.size());
ShuffleBuilder.add(BV, ReuseMask);
- Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices, SubVectors);
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
} else {
// Gather all constants.
SmallVector<int> Mask(GatheredScalars.size(), PoisonMaskElem);
@@ -13064,7 +13117,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
Value *BV = ShuffleBuilder.gather(GatheredScalars);
ShuffleBuilder.add(BV, Mask);
- Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices, SubVectors);
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
}
if (NeedFreeze)
@@ -13073,8 +13126,6 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy) {
- for (const auto &[EIdx, _] : E->CombinedEntriesWithIndices)
- (void)vectorizeTree(VectorizableTree[EIdx].get(), /*PostponedPHIs=*/false);
return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
Builder, *this);
}
@@ -13126,13 +13177,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
} else {
ShuffleBui...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/105780
More information about the llvm-commits
mailing list