[llvm] 4a1a697 - [SLP][NFC]Unify ScalarToTreeEntries and MultiNodeScalars, NFC
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 06:05:58 PST 2025
Author: Alexey Bataev
Date: 2025-01-29T09:05:54-05:00
New Revision: 4a1a697427bbec4a43f499c7b7aa38a05bf05727
URL: https://github.com/llvm/llvm-project/commit/4a1a697427bbec4a43f499c7b7aa38a05bf05727
DIFF: https://github.com/llvm/llvm-project/commit/4a1a697427bbec4a43f499c7b7aa38a05bf05727.diff
LOG: [SLP][NFC]Unify ScalarToTreeEntries and MultiNodeScalars, NFC
Currently, SLP has 2 distinct storages to manage mapping between
vectorized instructions and their corresponding vectorized TreeEntry
nodes. It leads to inefficient lookup for the matching TreeEntries and
makes it harder to correctly track instructions, associated with
multiple nodes.
There is a plan to extend this support for instructions, that require
scheduling, to allow support for copyable elements. Merging
ScalarToTreeEntry and MultiNodeScalars will allow reduce maintenance of
the feature
Reviewers: RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/124914
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2532edc5d86990..4204f35d1a20d6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1476,8 +1476,7 @@ class BoUpSLP {
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
VectorizableTree.clear();
- ScalarToTreeEntry.clear();
- MultiNodeScalars.clear();
+ ScalarToTreeEntries.clear();
MustGather.clear();
NonScheduledFirst.clear();
EntryToLastInstruction.clear();
@@ -1760,7 +1759,7 @@ class BoUpSLP {
auto AllUsersVectorized = [U1, U2, this](Value *V) {
return llvm::all_of(V->users(), [U1, U2, this](Value *U) {
- return U == U1 || U == U2 || R.getTreeEntry(U) != nullptr;
+ return U == U1 || U == U2 || R.isVectorized(U);
});
};
return AllUsersVectorized(V1) && AllUsersVectorized(V2);
@@ -1776,9 +1775,13 @@ class BoUpSLP {
}
auto CheckSameEntryOrFail = [&]() {
- if (const TreeEntry *TE1 = R.getTreeEntry(V1);
- TE1 && TE1 == R.getTreeEntry(V2))
- return LookAheadHeuristics::ScoreSplatLoads;
+ if (ArrayRef<TreeEntry *> TEs1 = R.getTreeEntries(V1); !TEs1.empty()) {
+ SmallPtrSet<TreeEntry *, 4> Set(TEs1.begin(), TEs1.end());
+ if (ArrayRef<TreeEntry *> TEs2 = R.getTreeEntries(V2);
+ !TEs2.empty() &&
+ any_of(TEs2, [&](TreeEntry *E) { return Set.contains(E); }))
+ return LookAheadHeuristics::ScoreSplatLoads;
+ }
return LookAheadHeuristics::ScoreFail;
};
@@ -2851,13 +2854,7 @@ class BoUpSLP {
continue;
auto *I = cast<Instruction>(V);
salvageDebugInfo(*I);
- SmallVector<const TreeEntry *> Entries;
- if (const TreeEntry *Entry = getTreeEntry(I)) {
- Entries.push_back(Entry);
- auto It = MultiNodeScalars.find(I);
- if (It != MultiNodeScalars.end())
- Entries.append(It->second.begin(), It->second.end());
- }
+ ArrayRef<TreeEntry *> Entries = getTreeEntries(I);
for (Use &U : I->operands()) {
if (auto *OpI = dyn_cast_if_present<Instruction>(U.get());
OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() &&
@@ -2961,7 +2958,10 @@ class BoUpSLP {
}
/// Check if the value is vectorized in the tree.
- bool isVectorized(Value *V) const { return getTreeEntry(V); }
+ bool isVectorized(Value *V) const {
+ assert(V && "V cannot be nullptr.");
+ return ScalarToTreeEntries.contains(V);
+ }
~BoUpSLP();
@@ -2999,16 +2999,10 @@ class BoUpSLP {
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
TreeEntry *TE = nullptr;
const auto *It = find_if(VL, [&](Value *V) {
- TE = getTreeEntry(V);
- if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
- return true;
- auto It = MultiNodeScalars.find(V);
- if (It != MultiNodeScalars.end()) {
- for (TreeEntry *E : It->second) {
- if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
- TE = E;
- return true;
- }
+ for (TreeEntry *E : getTreeEntries(V)) {
+ if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
+ TE = E;
+ return true;
}
}
return false;
@@ -3659,18 +3653,24 @@ class BoUpSLP {
Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
}
if (!Last->isGather()) {
+ SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
- const TreeEntry *TE = getTreeEntry(V);
- assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) &&
- "Scalar already in tree!");
- if (TE) {
- if (TE != Last)
- MultiNodeScalars.try_emplace(V).first->getSecond().push_back(Last);
- continue;
+ auto It = ScalarToTreeEntries.find(V);
+ assert(
+ (It == ScalarToTreeEntries.end() ||
+ (It->getSecond().size() == 1 && It->getSecond().front() == Last) ||
+ doesNotNeedToBeScheduled(V)) &&
+ "Scalar already in tree!");
+ if (It == ScalarToTreeEntries.end()) {
+ ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
+ (void)Processed.insert(V);
+ } else if (Processed.insert(V).second) {
+ assert(!is_contained(It->getSecond(), Last) &&
+ "Value already associated with the node.");
+ It->getSecond().push_back(Last);
}
- ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
ScheduleData *BundleMember = *Bundle;
@@ -3725,14 +3725,23 @@ class BoUpSLP {
}
#endif
- TreeEntry *getTreeEntry(Value *V) {
+ /// Get list of vector entries, associated with the value \p V.
+ ArrayRef<TreeEntry *> getTreeEntries(Value *V) const {
assert(V && "V cannot be nullptr.");
- return ScalarToTreeEntry.lookup(V);
+ auto It = ScalarToTreeEntries.find(V);
+ if (It == ScalarToTreeEntries.end())
+ return {};
+ return It->getSecond();
}
- const TreeEntry *getTreeEntry(Value *V) const {
+ /// Returns first vector node for value \p V, matching values \p VL.
+ TreeEntry *getSameValuesTreeEntry(Value *V, ArrayRef<Value *> VL,
+ bool SameVF = false) const {
assert(V && "V cannot be nullptr.");
- return ScalarToTreeEntry.lookup(V);
+ for (TreeEntry *TE : ScalarToTreeEntries.lookup(V))
+ if ((!SameVF || TE->getVectorFactor() == VL.size()) && TE->isSame(VL))
+ return TE;
+ return nullptr;
}
/// Check that the operand node of alternate node does not generate
@@ -3752,12 +3761,8 @@ class BoUpSLP {
OrdersType &CurrentOrder,
SmallVectorImpl<Value *> &PointerOps);
- /// Maps a specific scalar to its tree entry.
- SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
-
- /// List of scalars, used in several vectorize nodes, and the list of the
- /// nodes.
- SmallDenseMap<Value *, SmallVector<TreeEntry *>> MultiNodeScalars;
+ /// Maps a specific scalar to its tree entry(ies).
+ SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
@@ -3798,16 +3803,19 @@ class BoUpSLP {
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
- ExternalUser(Value *S, llvm::User *U, int L)
- : Scalar(S), User(U), Lane(L) {}
+ ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L)
+ : Scalar(S), User(U), E(E), Lane(L) {}
+
+ /// Which scalar in our function.
+ Value *Scalar = nullptr;
- // Which scalar in our function.
- Value *Scalar;
+ /// Which user that uses the scalar.
+ llvm::User *User = nullptr;
- // Which user that uses the scalar.
- llvm::User *User;
+ /// Vector node, the value is part of.
+ const TreeEntry &E;
- // Which lane does the scalar belong to.
+ /// Which lane does the scalar belong to.
int Lane;
};
using UserList = SmallVector<ExternalUser, 16>;
@@ -5113,7 +5121,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
auto IsAnyPointerUsedOutGraph =
IsPossibleStrided && any_of(PointerOps, [&](Value *V) {
return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
- return !getTreeEntry(U) && !MustGather.contains(U);
+ return !isVectorized(U) && !MustGather.contains(U);
});
});
const unsigned AbsoluteDiff = std::abs(*Diff);
@@ -6572,7 +6580,7 @@ void BoUpSLP::buildExternalUses(
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
<< FoundLane << " from " << *Scalar << ".\n");
ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
- ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
+ ExternalUses.emplace_back(Scalar, nullptr, *Entry, FoundLane);
continue;
}
for (User *U : Scalar->users()) {
@@ -6587,16 +6595,24 @@ void BoUpSLP::buildExternalUses(
continue;
// Skip in-tree scalars that become vectors
- if (TreeEntry *UseEntry = getTreeEntry(U)) {
+ if (ArrayRef<TreeEntry *> UseEntries = getTreeEntries(U);
+ !UseEntries.empty()) {
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
- if (UseEntry->State == TreeEntry::ScatterVectorize ||
- !doesInTreeUserNeedToExtract(
- Scalar, getRootEntryInstruction(*UseEntry), TLI, TTI)) {
+ if (any_of(UseEntries, [&](TreeEntry *UseEntry) {
+ return UseEntry->State == TreeEntry::ScatterVectorize ||
+ !doesInTreeUserNeedToExtract(
+ Scalar, getRootEntryInstruction(*UseEntry), TLI,
+ TTI);
+ })) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
- assert(!UseEntry->isGather() && "Bad state");
+ assert(none_of(UseEntries,
+ [](TreeEntry *UseEntry) {
+ return UseEntry->isGather();
+ }) &&
+ "Bad state");
continue;
}
U = nullptr;
@@ -6613,7 +6629,7 @@ void BoUpSLP::buildExternalUses(
<< " from lane " << FoundLane << " from " << *Scalar
<< ".\n");
It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
- ExternalUses.emplace_back(Scalar, U, FoundLane);
+ ExternalUses.emplace_back(Scalar, U, *Entry, FoundLane);
if (!U)
break;
}
@@ -6644,7 +6660,7 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
!isValidElementType(SI->getValueOperand()->getType()))
continue;
// Skip entry if already
- if (getTreeEntry(U))
+ if (isVectorized(U))
continue;
Value *Ptr =
@@ -7027,10 +7043,11 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (User *U : LI->users()) {
if (auto *UI = dyn_cast<Instruction>(U); UI && isDeleted(UI))
continue;
- if (const TreeEntry *UTE = getTreeEntry(U)) {
+ for (const TreeEntry *UTE : getTreeEntries(U)) {
for (int I : seq<int>(UTE->getNumOperands())) {
- if (all_of(UTE->getOperand(I),
- [LI](Value *V) { return V == LI; }))
+ if (all_of(UTE->getOperand(I), [LI](Value *V) {
+ return V == LI || isa<PoisonValue>(V);
+ }))
// Found legal broadcast - do not vectorize.
return false;
}
@@ -7135,7 +7152,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
int LastDist = LocalLoadsDists.front().second;
bool AllowMaskedGather = IsMaskedGatherSupported(OriginalLoads);
for (const std::pair<LoadInst *, int> &L : LocalLoadsDists) {
- if (getTreeEntry(L.first))
+ if (isVectorized(L.first))
continue;
assert(LastDist >= L.second &&
"Expected first distance always not less than second");
@@ -7187,9 +7204,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (auto [Slice, _] : Results) {
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads ("
<< Slice.size() << ")\n");
- if (any_of(Slice, [&](Value *V) { return getTreeEntry(V); })) {
+ if (any_of(Slice, [&](Value *V) { return isVectorized(V); })) {
for (Value *L : Slice)
- if (!getTreeEntry(L))
+ if (!isVectorized(L))
SortedNonVectorized.push_back(cast<LoadInst>(L));
continue;
}
@@ -7228,7 +7245,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
any_of(E->Scalars, [&, Slice = Slice](Value *V) {
if (isa<Constant>(V))
return false;
- if (getTreeEntry(V))
+ if (isVectorized(V))
return true;
const auto &Nodes = ValueToGatherNodes.at(V);
return (Nodes.size() != 1 || !Nodes.contains(E)) &&
@@ -7315,7 +7332,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (unsigned I = 0, E = Slice.size(); I < E; I += VF) {
ArrayRef<Value *> SubSlice =
Slice.slice(I, std::min(VF, E - I));
- if (getTreeEntry(SubSlice.front()))
+ if (isVectorized(SubSlice.front()))
continue;
// Check if the subslice is to be-vectorized entry, which is not
// equal to entry.
@@ -7585,7 +7602,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
if (isa<Constant, ExtractElementInst>(V) ||
- getTreeEntry(V) || (L && L->isLoopInvariant(V))) {
+ isVectorized(V) || (L && L->isLoopInvariant(V))) {
if (isa<UndefValue>(V))
++UndefCnt;
continue;
@@ -7603,7 +7620,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
return none_of(Uniques, [&](const auto &P) {
return P.first->hasNUsesOrMore(P.second + 1) &&
none_of(P.first->users(), [&](User *U) {
- return getTreeEntry(U) || Uniques.contains(U);
+ return isVectorized(U) || Uniques.contains(U);
});
});
}) ||
@@ -8167,59 +8184,25 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check if this is a duplicate of another entry.
if (S) {
- if (TreeEntry *E = getTreeEntry(S.getMainOp())) {
- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp()
- << ".\n");
- if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) {
- auto It = MultiNodeScalars.find(S.getMainOp());
- if (It != MultiNodeScalars.end()) {
- auto *TEIt = find_if(It->getSecond(),
- [&](TreeEntry *ME) { return ME->isSame(VL); });
- if (TEIt != It->getSecond().end())
- E = *TEIt;
- else
- E = nullptr;
- } else {
- E = nullptr;
- }
- }
- if (!E) {
- if (!doesNotNeedToBeScheduled(S.getMainOp())) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
- SmallPtrSet<const TreeEntry *, 4> Nodes;
- Nodes.insert(getTreeEntry(S.getMainOp()));
- for (const TreeEntry *E : MultiNodeScalars.lookup(S.getMainOp()))
- Nodes.insert(E);
- SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
- if (any_of(Nodes, [&](const TreeEntry *E) {
- if (all_of(E->Scalars,
- [&](Value *V) { return Values.contains(V); }))
- return true;
- SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
- E->Scalars.end());
- return (
- all_of(VL, [&](Value *V) { return EValues.contains(V); }));
- })) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
- } else {
- // Record the reuse of the tree node. FIXME, currently this is only
- // used to properly draw the graph rather than for the actual
- // vectorization.
+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
+ for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
+ if (E->isSame(VL)) {
+ // Record the reuse of the tree node.
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
<< ".\n");
return;
}
+ SmallPtrSet<Value *, 8> Values(E->Scalars.begin(), E->Scalars.end());
+ if (all_of(VL, [&](Value *V) {
+ return isa<PoisonValue>(V) || Values.contains(V);
+ })) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ return;
+ }
}
}
@@ -8371,7 +8354,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
doesNotNeedToBeScheduled(V))
continue;
- if (getTreeEntry(V)) {
+ if (isVectorized(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
if (TryToFindDuplicates(S))
@@ -9029,8 +9012,7 @@ bool BoUpSLP::areAllUsersVectorized(
Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const {
return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) ||
all_of(I->users(), [this](User *U) {
- return ScalarToTreeEntry.contains(U) ||
- isVectorLikeInstWithConstOps(U) ||
+ return isVectorized(U) || isVectorLikeInstWithConstOps(U) ||
(isa<ExtractElementInst>(U) && MustGather.contains(U));
});
}
@@ -9844,13 +9826,9 @@ void BoUpSLP::transformNodes() {
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
// If any instruction is vectorized already - do not try again.
// Reuse the existing node, if it fully matches the slice.
- if (const TreeEntry *SE = getTreeEntry(Slice.front());
- SE || getTreeEntry(Slice.back())) {
- if (!SE)
- continue;
- if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
- continue;
- }
+ if (isVectorized(Slice.front()) &&
+ !getSameValuesTreeEntry(Slice.front(), Slice, /*SameVF=*/true))
+ continue;
// Constant already handled effectively - skip.
if (allConstant(Slice))
continue;
@@ -9933,12 +9911,8 @@ void BoUpSLP::transformNodes() {
for (auto [Cnt, Sz] : Slices) {
ArrayRef<Value *> Slice = VL.slice(Cnt, Sz);
// If any instruction is vectorized already - do not try again.
- if (TreeEntry *SE = getTreeEntry(Slice.front());
- SE || getTreeEntry(Slice.back())) {
- if (!SE)
- continue;
- if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
- continue;
+ if (TreeEntry *SE = getSameValuesTreeEntry(Slice.front(), Slice,
+ /*SameVF=*/true)) {
SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
AddCombinedNode(SE->Idx, Cnt, Sz);
continue;
@@ -10724,7 +10698,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
auto *EE = cast<ExtractElementInst>(V);
VecBase = EE->getVectorOperand();
UniqueBases.insert(VecBase);
- const TreeEntry *VE = R.getTreeEntry(V);
+ ArrayRef<TreeEntry *> VEs = R.getTreeEntries(V);
if (!CheckedExtracts.insert(V).second ||
!R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) ||
any_of(EE->users(),
@@ -10733,7 +10707,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
!R.areAllUsersVectorized(cast<Instruction>(U),
&VectorizedVals);
}) ||
- (VE && VE != E))
+ (!VEs.empty() && !is_contained(VEs, E)))
continue;
std::optional<unsigned> EEIdx = getExtractIndex(EE);
if (!EEIdx)
@@ -11166,13 +11140,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
const unsigned Sz = UniqueValues.size();
SmallBitVector UsedScalars(Sz, false);
for (unsigned I = 0; I < Sz; ++I) {
- if (isa<Instruction>(UniqueValues[I]) && getTreeEntry(UniqueValues[I]) == E)
+ if (isa<Instruction>(UniqueValues[I]) &&
+ is_contained(getTreeEntries(UniqueValues[I]), E))
continue;
UsedScalars.set(I);
}
auto GetCastContextHint = [&](Value *V) {
- if (const TreeEntry *OpTE = getTreeEntry(V))
- return getCastContextHint(*OpTE);
+ if (ArrayRef<TreeEntry *> OpTEs = getTreeEntries(V); OpTEs.size() == 1)
+ return getCastContextHint(*OpTEs.front());
InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI);
if (SrcState && SrcState.getOpcode() == Instruction::Load &&
!SrcState.isAltShuffle())
@@ -11294,11 +11269,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
Value *Op = PHI->getIncomingValue(I);
Operands[I] = Op;
}
- if (const TreeEntry *OpTE = getTreeEntry(Operands.front()))
- if (OpTE->isSame(Operands) && CountedOps.insert(OpTE).second)
- if (!OpTE->ReuseShuffleIndices.empty())
- ScalarCost += TTI::TCC_Basic * (OpTE->ReuseShuffleIndices.size() -
- OpTE->Scalars.size());
+ if (const TreeEntry *OpTE =
+ getSameValuesTreeEntry(Operands.front(), Operands))
+ if (CountedOps.insert(OpTE).second &&
+ !OpTE->ReuseShuffleIndices.empty())
+ ScalarCost += TTI::TCC_Basic * (OpTE->ReuseShuffleIndices.size() -
+ OpTE->Scalars.size());
}
return CommonCost - ScalarCost;
@@ -12231,7 +12207,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
// Update LiveValues.
LiveValues.erase(PrevInst);
for (auto &J : PrevInst->operands()) {
- if (isa<Instruction>(&*J) && getTreeEntry(&*J))
+ if (isa<Instruction>(&*J) && isVectorized(&*J))
LiveValues.insert(cast<Instruction>(&*J));
}
@@ -12478,9 +12454,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
continue;
}
if (TE.isGather() && TE.hasState()) {
- if (const TreeEntry *E = getTreeEntry(TE.getMainOp());
- E && E->getVectorFactor() == TE.getVectorFactor() &&
- E->isSame(TE.Scalars)) {
+ if (const TreeEntry *E =
+ getSameValuesTreeEntry(TE.getMainOp(), TE.Scalars);
+ E && E->getVectorFactor() == TE.getVectorFactor()) {
// Some gather nodes might be absolutely the same as some vectorizable
// nodes after reordering, need to handle it.
LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle "
@@ -12552,7 +12528,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
continue;
std::optional<unsigned> InsertIdx = getElementIndex(VU);
if (InsertIdx) {
- const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
+ const TreeEntry *ScalarTE = &EU.E;
auto *It = find_if(
ShuffledInserts,
[this, VU](const ShuffledInsertData<const TreeEntry *> &Data) {
@@ -12561,7 +12537,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
return areTwoInsertFromSameBuildVector(
VU, VecInsert, [this](InsertElementInst *II) -> Value * {
Value *Op0 = II->getOperand(0);
- if (getTreeEntry(II) && !getTreeEntry(Op0))
+ if (isVectorized(II) && !isVectorized(Op0))
return nullptr;
return Op0;
});
@@ -12619,7 +12595,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// for the extract and the added cost of the sign extend if needed.
InstructionCost ExtraCost = TTI::TCC_Free;
auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth);
- const TreeEntry *Entry = getTreeEntry(EU.Scalar);
+ const TreeEntry *Entry = &EU.E;
auto It = MinBWs.find(Entry);
if (It != MinBWs.end()) {
auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
@@ -12662,7 +12638,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
auto *Inst = cast<Instruction>(EU.Scalar);
InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind);
auto OperandIsScalar = [&](Value *V) {
- if (!getTreeEntry(V)) {
+ if (!isVectorized(V)) {
// Some extractelements might be not vectorized, but
// transformed into shuffle and removed from the function,
// consider it here.
@@ -12678,7 +12654,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
if (auto *Op = dyn_cast<Instruction>(CI->getOperand(0));
Op && all_of(Op->operands(), OperandIsScalar)) {
InstructionCost OpCost =
- (getTreeEntry(Op) && !ValueToExtUses->contains(Op))
+ (isVectorized(Op) && !ValueToExtUses->contains(Op))
? TTI->getInstructionCost(Op, CostKind)
: 0;
if (ScalarCost + OpCost <= ExtraCost) {
@@ -12705,7 +12681,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
cast<Instruction>(
VectorizableTree.front()->getMainOp())
->getParent()) &&
- !getTreeEntry(U);
+ !isVectorized(U);
}) &&
count_if(Entry->Scalars, [&](Value *V) {
return ValueToExtUses->contains(V);
@@ -12767,8 +12743,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// instead of extractelement.
for (Value *V : ScalarOpsFromCasts) {
ExternalUsesAsOriginalScalar.insert(V);
- if (const TreeEntry *E = getTreeEntry(V)) {
- ExternalUses.emplace_back(V, nullptr, E->findLaneForValue(V));
+ if (ArrayRef<TreeEntry *> TEs = getTreeEntries(V); !TEs.empty()) {
+ ExternalUses.emplace_back(V, nullptr, *TEs.front(),
+ TEs.front()->findLaneForValue(V));
}
}
// Add reduced value cost, if resized.
@@ -13188,21 +13165,18 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
VToTEs.insert(TEPtr);
}
- if (const TreeEntry *VTE = getTreeEntry(V)) {
- if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0)) {
- if (VTE->State != TreeEntry::Vectorize) {
- auto It = MultiNodeScalars.find(V);
- if (It == MultiNodeScalars.end())
- continue;
- VTE = *It->getSecond().begin();
- // Iterate through all vectorized nodes.
- auto *MIt = find_if(It->getSecond(), [](const TreeEntry *MTE) {
- return MTE->State == TreeEntry::Vectorize;
- });
- if (MIt == It->getSecond().end())
- continue;
- VTE = *MIt;
- }
+ if (ArrayRef<TreeEntry *> VTEs = getTreeEntries(V); !VTEs.empty()) {
+ const TreeEntry *VTE = VTEs.front();
+ if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0) &&
+ VTEs.size() > 1 && VTE->State != TreeEntry::Vectorize) {
+ VTEs = VTEs.drop_front();
+ // Iterate through all vectorized nodes.
+ const auto *MIt = find_if(VTEs, [](const TreeEntry *MTE) {
+ return MTE->State == TreeEntry::Vectorize;
+ });
+ if (MIt == VTEs.end())
+ continue;
+ VTE = *MIt;
}
if (none_of(TE->CombinedEntriesWithIndices,
[&](const auto &P) { return P.first == VTE->Idx; })) {
@@ -13366,7 +13340,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
// by extractelements processing) or may form vector node in future.
auto MightBeIgnored = [=](Value *V) {
auto *I = dyn_cast<Instruction>(V);
- return I && !IsSplatOrUndefs && !ScalarToTreeEntry.count(I) &&
+ return I && !IsSplatOrUndefs && !isVectorized(I) &&
!isVectorLikeInstWithConstOps(I) &&
!areAllUsersVectorized(I, UserIgnoreList) && isSimple(I);
};
@@ -13952,7 +13926,7 @@ Value *BoUpSLP::gather(
for (int I = 0, E = VL.size(); I < E; ++I) {
if (auto *Inst = dyn_cast<Instruction>(VL[I]))
if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
- getTreeEntry(Inst) ||
+ isVectorized(Inst) ||
(L && (!Root || L->isLoopInvariant(Root)) && L->contains(Inst))) &&
PostponedIndices.insert(I).second)
PostponedInsts.emplace_back(Inst, I);
@@ -13969,7 +13943,7 @@ Value *BoUpSLP::gather(
isa_and_nonnull<SExtInst, ZExtInst>(CI)) {
Value *Op = CI->getOperand(0);
if (auto *IOp = dyn_cast<Instruction>(Op);
- !IOp || !(isDeleted(IOp) || getTreeEntry(IOp)))
+ !IOp || !(isDeleted(IOp) || isVectorized(IOp)))
V = Op;
}
Scalar = Builder.CreateIntCast(
@@ -13995,7 +13969,7 @@ Value *BoUpSLP::gather(
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
if (isa<Instruction>(V)) {
- if (TreeEntry *Entry = getTreeEntry(V)) {
+ if (ArrayRef<TreeEntry *> Entries = getTreeEntries(V); !Entries.empty()) {
// Find which lane we need to extract.
User *UserOp = nullptr;
if (Scalar != V) {
@@ -14005,8 +13979,8 @@ Value *BoUpSLP::gather(
UserOp = InsElt;
}
if (UserOp) {
- unsigned FoundLane = Entry->findLaneForValue(V);
- ExternalUses.emplace_back(V, UserOp, FoundLane);
+ unsigned FoundLane = Entries.front()->findLaneForValue(V);
+ ExternalUses.emplace_back(V, UserOp, *Entries.front(), FoundLane);
}
}
}
@@ -14241,8 +14215,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
continue;
auto *EI = cast<ExtractElementInst>(VL[I]);
VecBase = EI->getVectorOperand();
- if (const TreeEntry *TE = R.getTreeEntry(VecBase))
- VecBase = TE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> TEs = R.getTreeEntries(VecBase); !TEs.empty())
+ VecBase = TEs.front()->VectorizedValue;
assert(VecBase && "Expected vectorized value.");
UniqueBases.insert(VecBase);
// If the only one use is vectorized - can delete the extractelement
@@ -14250,18 +14224,20 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
if (!EI->hasOneUse() || R.ExternalUsesAsOriginalScalar.contains(EI) ||
(NumParts != 1 && count(VL, EI) > 1) ||
any_of(EI->users(), [&](User *U) {
- const TreeEntry *UTE = R.getTreeEntry(U);
- return !UTE || R.MultiNodeScalars.contains(U) ||
+ ArrayRef<TreeEntry *> UTEs = R.getTreeEntries(U);
+ return UTEs.empty() || UTEs.size() > 1 ||
(isa<GetElementPtrInst>(U) &&
!R.areAllUsersVectorized(cast<Instruction>(U))) ||
- count_if(R.VectorizableTree,
- [&](const std::unique_ptr<TreeEntry> &TE) {
- return any_of(TE->UserTreeIndices,
- [&](const EdgeInfo &Edge) {
- return Edge.UserTE == UTE;
- }) &&
- is_contained(VL, EI);
- }) != 1;
+ (!UTEs.empty() &&
+ count_if(R.VectorizableTree,
+ [&](const std::unique_ptr<TreeEntry> &TE) {
+ return any_of(TE->UserTreeIndices,
+ [&](const EdgeInfo &Edge) {
+ return Edge.UserTE ==
+ UTEs.front();
+ }) &&
+ is_contained(VL, EI);
+ }) != 1);
}))
continue;
R.eraseInstruction(EI);
@@ -14296,8 +14272,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
return S;
Value *VecOp =
cast<ExtractElementInst>(std::get<0>(D))->getVectorOperand();
- if (const TreeEntry *TE = R.getTreeEntry(VecOp))
- VecOp = TE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> TEs = R.getTreeEntries(VecOp);
+ !TEs.empty())
+ VecOp = TEs.front()->VectorizedValue;
assert(VecOp && "Expected vectorized value.");
const unsigned Size =
cast<FixedVectorType>(VecOp->getType())->getNumElements();
@@ -14307,8 +14284,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
if (I == PoisonMaskElem)
continue;
Value *VecOp = cast<ExtractElementInst>(V)->getVectorOperand();
- if (const TreeEntry *TE = R.getTreeEntry(VecOp))
- VecOp = TE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> TEs = R.getTreeEntries(VecOp); !TEs.empty())
+ VecOp = TEs.front()->VectorizedValue;
assert(VecOp && "Expected vectorized value.");
VecOp = castToScalarTyElem(VecOp);
Bases[I / VF] = VecOp;
@@ -14634,29 +14611,20 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E,
if (!S)
return nullptr;
auto CheckSameVE = [&](const TreeEntry *VE) {
- return VE->isSame(VL) &&
- (any_of(VE->UserTreeIndices,
- [E, NodeIdx](const EdgeInfo &EI) {
- return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
- }) ||
- any_of(VectorizableTree,
- [E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isOperandGatherNode(
- {const_cast<TreeEntry *>(E), NodeIdx}) &&
- VE->isSame(TE->Scalars);
- }));
+ return any_of(VE->UserTreeIndices,
+ [E, NodeIdx](const EdgeInfo &EI) {
+ return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
+ }) ||
+ any_of(VectorizableTree,
+ [E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->isOperandGatherNode(
+ {const_cast<TreeEntry *>(E), NodeIdx}) &&
+ VE->isSame(TE->Scalars);
+ });
};
- TreeEntry *VE = getTreeEntry(S.getMainOp());
+ TreeEntry *VE = getSameValuesTreeEntry(S.getMainOp(), VL);
if (VE && CheckSameVE(VE))
return VE;
- auto It = MultiNodeScalars.find(S.getMainOp());
- if (It != MultiNodeScalars.end()) {
- auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
- return TE != VE && CheckSameVE(TE);
- });
- if (I != It->getSecond().end())
- return *I;
- }
return nullptr;
}
@@ -14874,9 +14842,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
for (auto [Idx, I] : enumerate(ExtractMask)) {
if (I == PoisonMaskElem)
continue;
- if (const auto *TE = getTreeEntry(
- cast<ExtractElementInst>(StoredGS[Idx])->getVectorOperand()))
- ExtractEntries.push_back(TE);
+ if (ArrayRef<TreeEntry *> TEs = getTreeEntries(
+ cast<ExtractElementInst>(StoredGS[Idx])->getVectorOperand());
+ !TEs.empty())
+ ExtractEntries.append(TEs.begin(), TEs.end());
}
if (std::optional<ResTy> Delayed =
ShuffleBuilder.needToDelay(E, ExtractEntries)) {
@@ -14907,10 +14876,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
any_of(E->Scalars, IsaPred<LoadInst>)) &&
any_of(E->Scalars,
[this](Value *V) {
- return isa<LoadInst>(V) && getTreeEntry(V);
+ return isa<LoadInst>(V) && isVectorized(V);
})) ||
(E->hasState() && E->isAltShuffle()) ||
- all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
+ all_of(E->Scalars, [this](Value *V) { return isVectorized(V); }) ||
isSplat(E->Scalars) ||
(E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) {
GatherShuffles =
@@ -15025,7 +14994,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
// non-poisonous, or by freezing the incoming scalar value first.
auto *It = find_if(Scalars, [this, E](Value *V) {
return !isa<UndefValue>(V) &&
- (getTreeEntry(V) || isGuaranteedNotToBePoison(V, AC) ||
+ (isVectorized(V) || isGuaranteedNotToBePoison(V, AC) ||
(E->UserTreeIndices.size() == 1 &&
any_of(V->uses(), [E](const Use &U) {
// Check if the value already used in the same operation in
@@ -15083,9 +15052,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
continue;
auto *EI = cast<ExtractElementInst>(StoredGS[I]);
Value *VecOp = EI->getVectorOperand();
- if (const auto *TE = getTreeEntry(VecOp))
- if (TE->VectorizedValue)
- VecOp = TE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> TEs = getTreeEntries(VecOp);
+ !TEs.empty() && TEs.front()->VectorizedValue)
+ VecOp = TEs.front()->VectorizedValue;
if (!Vec1) {
Vec1 = VecOp;
} else if (Vec1 != VecOp) {
@@ -15413,8 +15382,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
- if (const TreeEntry *TE = getTreeEntry(V))
- V = TE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> TEs = getTreeEntries(V); !TEs.empty())
+ V = TEs.front()->VectorizedValue;
setInsertPointAfterBundle(E);
V = FinalShuffle(V, E);
E->VectorizedValue = V;
@@ -16344,13 +16313,13 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
DenseMap<Value *, SmallVector<TreeEntry *>> PostponedValues;
for (const TreeEntry *E : PostponedNodes) {
auto *TE = const_cast<TreeEntry *>(E);
- if (auto *VecTE = getTreeEntry(TE->Scalars.front()))
- if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand(
- TE->UserTreeIndices.front().EdgeIdx)) &&
- VecTE->isSame(TE->Scalars))
- // Found gather node which is absolutely the same as one of the
- // vectorized nodes. It may happen after reordering.
- continue;
+ if (auto *VecTE = getSameValuesTreeEntry(
+ TE->Scalars.front(), TE->UserTreeIndices.front().UserTE->getOperand(
+ TE->UserTreeIndices.front().EdgeIdx));
+ VecTE && VecTE->isSame(TE->Scalars))
+ // Found gather node which is absolutely the same as one of the
+ // vectorized nodes. It may happen after reordering.
+ continue;
auto *PrevVec = cast<Instruction>(TE->VectorizedValue);
TE->VectorizedValue = nullptr;
auto *UserI =
@@ -16392,14 +16361,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
"Expected integer vector types only.");
std::optional<bool> IsSigned;
for (Value *V : TE->Scalars) {
- if (const TreeEntry *BaseTE = getTreeEntry(V)) {
- auto It = MinBWs.find(BaseTE);
- if (It != MinBWs.end()) {
- IsSigned = IsSigned.value_or(false) || It->second.second;
- if (*IsSigned)
- break;
- }
- for (const TreeEntry *MNTE : MultiNodeScalars.lookup(V)) {
+ if (isVectorized(V)) {
+ for (const TreeEntry *MNTE : getTreeEntries(V)) {
auto It = MinBWs.find(MNTE);
if (It != MinBWs.end()) {
IsSigned = IsSigned.value_or(false) || It->second.second;
@@ -16475,7 +16438,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
// has multiple uses of the same value.
if (User && !is_contained(Scalar->users(), User))
continue;
- TreeEntry *E = getTreeEntry(Scalar);
+ const TreeEntry *E = &ExternalUse.E;
assert(E && "Invalid scalar");
assert(!E->isGather() && "Extracting from a gather list");
// Non-instruction pointers are not deleted, just skip them.
@@ -16533,8 +16496,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
ES && isa<Instruction>(Vec)) {
Value *V = ES->getVectorOperand();
auto *IVec = cast<Instruction>(Vec);
- if (const TreeEntry *ETE = getTreeEntry(V))
- V = ETE->VectorizedValue;
+ if (ArrayRef<TreeEntry *> ETEs = getTreeEntries(V); !ETEs.empty())
+ V = ETEs.front()->VectorizedValue;
if (auto *IV = dyn_cast<Instruction>(V);
!IV || IV == Vec || IV->getParent() != IVec->getParent() ||
IV->comesBefore(IVec))
@@ -16587,27 +16550,31 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
if (!User) {
if (!ScalarsWithNullptrUser.insert(Scalar).second)
continue;
- assert((ExternallyUsedValues.count(Scalar) ||
- Scalar->hasNUsesOrMore(UsesLimit) ||
- ExternalUsesAsOriginalScalar.contains(Scalar) ||
- any_of(Scalar->users(),
- [&](llvm::User *U) {
- if (ExternalUsesAsOriginalScalar.contains(U))
- return true;
- TreeEntry *UseEntry = getTreeEntry(U);
- return UseEntry &&
- (UseEntry->State == TreeEntry::Vectorize ||
- UseEntry->State ==
- TreeEntry::StridedVectorize) &&
- (E->State == TreeEntry::Vectorize ||
- E->State == TreeEntry::StridedVectorize) &&
- doesInTreeUserNeedToExtract(
- Scalar, getRootEntryInstruction(*UseEntry),
- TLI, TTI);
- })) &&
- "Scalar with nullptr User must be registered in "
- "ExternallyUsedValues map or remain as scalar in vectorized "
- "instructions");
+ assert(
+ (ExternallyUsedValues.count(Scalar) ||
+ Scalar->hasNUsesOrMore(UsesLimit) ||
+ ExternalUsesAsOriginalScalar.contains(Scalar) ||
+ any_of(
+ Scalar->users(),
+ [&, TTI = TTI](llvm::User *U) {
+ if (ExternalUsesAsOriginalScalar.contains(U))
+ return true;
+ ArrayRef<TreeEntry *> UseEntries = getTreeEntries(U);
+ return !UseEntries.empty() &&
+ (E->State == TreeEntry::Vectorize ||
+ E->State == TreeEntry::StridedVectorize) &&
+ any_of(UseEntries, [&, TTI = TTI](TreeEntry *UseEntry) {
+ return (UseEntry->State == TreeEntry::Vectorize ||
+ UseEntry->State ==
+ TreeEntry::StridedVectorize) &&
+ doesInTreeUserNeedToExtract(
+ Scalar, getRootEntryInstruction(*UseEntry),
+ TLI, TTI);
+ });
+ })) &&
+ "Scalar with nullptr User must be registered in "
+ "ExternallyUsedValues map or remain as scalar in vectorized "
+ "instructions");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (auto *PHI = dyn_cast<PHINode>(VecI)) {
if (PHI->getParent()->isLandingPad())
@@ -16870,7 +16837,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
- assert((getTreeEntry(U) ||
+ assert((isVectorized(U) ||
(UserIgnoreList && UserIgnoreList->contains(U)) ||
(isa_and_nonnull<Instruction>(U) &&
isDeleted(cast<Instruction>(U)))) &&
@@ -16892,7 +16859,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
// Clear up reduction references, if any.
if (UserIgnoreList) {
for (Instruction *I : RemovedInsts) {
- const TreeEntry *IE = getTreeEntry(I);
+ const TreeEntry *IE = getTreeEntries(I).front();
if (IE->Idx != 0 &&
!(VectorizableTree.front()->isGather() &&
!IE->UserTreeIndices.empty() &&
@@ -17607,10 +17574,11 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
if (ScheduleData *SD = BS->getScheduleData(I)) {
- [[maybe_unused]] TreeEntry *SDTE = getTreeEntry(SD->Inst);
+ [[maybe_unused]] ArrayRef<TreeEntry *> SDTEs = getTreeEntries(SD->Inst);
assert((isVectorLikeInstWithConstOps(SD->Inst) ||
SD->isPartOfBundle() ==
- (SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) &&
+ (!SDTEs.empty() &&
+ !doesNotNeedToSchedule(SDTEs.front()->Scalars))) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
@@ -17772,7 +17740,7 @@ bool BoUpSLP::collectValuesToDemote(
auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
if (isa<PoisonValue>(V))
return true;
- if (MultiNodeScalars.contains(V))
+ if (getTreeEntries(V).size() > 1)
return false;
// For lat shuffle of sext/zext with many uses need to check the extra bit
// for unsigned values, otherwise may have incorrect casting for reused
@@ -17834,14 +17802,14 @@ bool BoUpSLP::collectValuesToDemote(
if (E.isGather() || !Visited.insert(&E).second ||
any_of(E.Scalars, [&](Value *V) {
return !isa<PoisonValue>(V) && all_of(V->users(), [&](User *U) {
- return isa<InsertElementInst>(U) && !getTreeEntry(U);
+ return isa<InsertElementInst>(U) && !isVectorized(U);
});
}))
return FinalAnalysis();
if (any_of(E.Scalars, [&](Value *V) {
return !all_of(V->users(), [=](User *U) {
- return getTreeEntry(U) ||
+ return isVectorized(U) ||
(E.Idx == 0 && UserIgnoreList &&
UserIgnoreList->contains(U)) ||
(!isa<CmpInst>(U) && U->getType()->isSized() &&
@@ -18192,9 +18160,9 @@ void BoUpSLP::computeMinimumValueSizes() {
return V->hasOneUse() || isa<Constant>(V) ||
(!V->hasNUsesOrMore(UsesLimit) &&
none_of(V->users(), [&](User *U) {
- const TreeEntry *TE = getTreeEntry(U);
+ ArrayRef<TreeEntry *> TEs = getTreeEntries(U);
const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE;
- if (TE == UserTE || !TE)
+ if (TEs.empty() || is_contained(TEs, UserTE))
return false;
if (!isa<CastInst, BinaryOperator, FreezeInst, PHINode,
SelectInst>(U) ||
@@ -18203,8 +18171,11 @@ void BoUpSLP::computeMinimumValueSizes() {
return true;
unsigned UserTESz = DL->getTypeSizeInBits(
UserTE->Scalars.front()->getType());
- auto It = MinBWs.find(TE);
- if (It != MinBWs.end() && It->second.first > UserTESz)
+ if (all_of(TEs, [&](const TreeEntry *TE) {
+ auto It = MinBWs.find(TE);
+ return It != MinBWs.end() &&
+ It->second.first > UserTESz;
+ }))
return true;
return DL->getTypeSizeInBits(U->getType()) > UserTESz;
}));
More information about the llvm-commits
mailing list