[llvm] [SLP][NFC]Unify ScalarToTreeEntries and MultiNodeScalars, NFC (PR #124914)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 04:16:21 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Currently, SLP has 2 distinct storages to manage mapping between
vectorized instructions and their corresponding vectorized TreeEntry
nodes. It leads to inefficient lookup for the matching TreeEntries and
makes it harder to correctly track instructions, associated with
multiple nodes.
There is a plan to extend this support for instructions, that require
scheduling, to allow support for copyable elements. Merging
ScalarToTreeEntry and MultiNodeScalars will allow reduce maintenance of
the feature
---
Patch is 48.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124914.diff
1 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+244-272)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2532edc5d86990..790c4dba0dc36b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1476,8 +1476,7 @@ class BoUpSLP {
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
VectorizableTree.clear();
- ScalarToTreeEntry.clear();
- MultiNodeScalars.clear();
+ ScalarToTreeEntries.clear();
MustGather.clear();
NonScheduledFirst.clear();
EntryToLastInstruction.clear();
@@ -1760,7 +1759,7 @@ class BoUpSLP {
auto AllUsersVectorized = [U1, U2, this](Value *V) {
return llvm::all_of(V->users(), [U1, U2, this](Value *U) {
- return U == U1 || U == U2 || R.getTreeEntry(U) != nullptr;
+ return U == U1 || U == U2 || R.isVectorized(U);
});
};
return AllUsersVectorized(V1) && AllUsersVectorized(V2);
@@ -1776,9 +1775,13 @@ class BoUpSLP {
}
auto CheckSameEntryOrFail = [&]() {
- if (const TreeEntry *TE1 = R.getTreeEntry(V1);
- TE1 && TE1 == R.getTreeEntry(V2))
- return LookAheadHeuristics::ScoreSplatLoads;
+ if (ArrayRef<TreeEntry *> TEs1 = R.getTreeEntries(V1); !TEs1.empty()) {
+ SmallPtrSet<TreeEntry *, 4> Set(TEs1.begin(), TEs1.end());
+ if (ArrayRef<TreeEntry *> TEs2 = R.getTreeEntries(V2);
+ !TEs2.empty() &&
+ any_of(TEs2, [&](TreeEntry *E) { return Set.contains(E); }))
+ return LookAheadHeuristics::ScoreSplatLoads;
+ }
return LookAheadHeuristics::ScoreFail;
};
@@ -2851,13 +2854,7 @@ class BoUpSLP {
continue;
auto *I = cast<Instruction>(V);
salvageDebugInfo(*I);
- SmallVector<const TreeEntry *> Entries;
- if (const TreeEntry *Entry = getTreeEntry(I)) {
- Entries.push_back(Entry);
- auto It = MultiNodeScalars.find(I);
- if (It != MultiNodeScalars.end())
- Entries.append(It->second.begin(), It->second.end());
- }
+ ArrayRef<TreeEntry *> Entries = getTreeEntries(I);
for (Use &U : I->operands()) {
if (auto *OpI = dyn_cast_if_present<Instruction>(U.get());
OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() &&
@@ -2961,7 +2958,11 @@ class BoUpSLP {
}
/// Check if the value is vectorized in the tree.
- bool isVectorized(Value *V) const { return getTreeEntry(V); }
+ bool isVectorized(Value *V) const {
+ assert(V && "V cannot be nullptr.");
+ return ScalarToTreeEntries.contains(V);
+ }
+
~BoUpSLP();
@@ -2999,16 +3000,10 @@ class BoUpSLP {
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
TreeEntry *TE = nullptr;
const auto *It = find_if(VL, [&](Value *V) {
- TE = getTreeEntry(V);
- if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
- return true;
- auto It = MultiNodeScalars.find(V);
- if (It != MultiNodeScalars.end()) {
- for (TreeEntry *E : It->second) {
- if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
- TE = E;
- return true;
- }
+ for (TreeEntry *E : getTreeEntries(V)) {
+ if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
+ TE = E;
+ return true;
}
}
return false;
@@ -3659,18 +3654,24 @@ class BoUpSLP {
Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
}
if (!Last->isGather()) {
+ SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
- const TreeEntry *TE = getTreeEntry(V);
- assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) &&
- "Scalar already in tree!");
- if (TE) {
- if (TE != Last)
- MultiNodeScalars.try_emplace(V).first->getSecond().push_back(Last);
- continue;
+ auto It = ScalarToTreeEntries.find(V);
+ assert(
+ (It == ScalarToTreeEntries.end() ||
+ (It->getSecond().size() == 1 && It->getSecond().front() == Last) ||
+ doesNotNeedToBeScheduled(V)) &&
+ "Scalar already in tree!");
+ if (It == ScalarToTreeEntries.end()) {
+ ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
+ (void)Processed.insert(V);
+ } else if (Processed.insert(V).second) {
+ assert(!is_contained(It->getSecond(), Last) &&
+ "Value already associated with the node.");
+ It->getSecond().push_back(Last);
}
- ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
ScheduleData *BundleMember = *Bundle;
@@ -3725,14 +3726,23 @@ class BoUpSLP {
}
#endif
- TreeEntry *getTreeEntry(Value *V) {
+ /// Get list of vector entries, associated with the value \p V.
+ ArrayRef<TreeEntry *> getTreeEntries(Value *V) const {
assert(V && "V cannot be nullptr.");
- return ScalarToTreeEntry.lookup(V);
+ auto It = ScalarToTreeEntries.find(V);
+ if (It == ScalarToTreeEntries.end())
+ return {};
+ return It->getSecond();
}
- const TreeEntry *getTreeEntry(Value *V) const {
+ /// Returns first vector node for value \p V, matching values \p VL.
+ TreeEntry *getSameValuesTreeEntry(Value *V, ArrayRef<Value *> VL,
+ bool SameVF = false) const {
assert(V && "V cannot be nullptr.");
- return ScalarToTreeEntry.lookup(V);
+ for (TreeEntry *TE : ScalarToTreeEntries.lookup(V))
+ if ((!SameVF || TE->getVectorFactor() == VL.size()) && TE->isSame(VL))
+ return TE;
+ return nullptr;
}
/// Check that the operand node of alternate node does not generate
@@ -3752,12 +3762,8 @@ class BoUpSLP {
OrdersType &CurrentOrder,
SmallVectorImpl<Value *> &PointerOps);
- /// Maps a specific scalar to its tree entry.
- SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
-
- /// List of scalars, used in several vectorize nodes, and the list of the
- /// nodes.
- SmallDenseMap<Value *, SmallVector<TreeEntry *>> MultiNodeScalars;
+ /// Maps a specific scalar to its tree entry(ies).
+ SmallDenseMap<Value *, SmallVector<TreeEntry *>> ScalarToTreeEntries;
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
@@ -3798,16 +3804,19 @@ class BoUpSLP {
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
- ExternalUser(Value *S, llvm::User *U, int L)
- : Scalar(S), User(U), Lane(L) {}
+ ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L)
+ : Scalar(S), User(U), E(E), Lane(L) {}
+
+ /// Which scalar in our function.
+ Value *Scalar = nullptr;
- // Which scalar in our function.
- Value *Scalar;
+ /// Which user that uses the scalar.
+ llvm::User *User = nullptr;
- // Which user that uses the scalar.
- llvm::User *User;
+ /// Vector node, the value is part of.
+ const TreeEntry &E;
- // Which lane does the scalar belong to.
+ /// Which lane does the scalar belong to.
int Lane;
};
using UserList = SmallVector<ExternalUser, 16>;
@@ -5113,7 +5122,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
auto IsAnyPointerUsedOutGraph =
IsPossibleStrided && any_of(PointerOps, [&](Value *V) {
return isa<Instruction>(V) && any_of(V->users(), [&](User *U) {
- return !getTreeEntry(U) && !MustGather.contains(U);
+ return !isVectorized(U) && !MustGather.contains(U);
});
});
const unsigned AbsoluteDiff = std::abs(*Diff);
@@ -6572,7 +6581,7 @@ void BoUpSLP::buildExternalUses(
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
<< FoundLane << " from " << *Scalar << ".\n");
ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
- ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
+ ExternalUses.emplace_back(Scalar, nullptr, *Entry, FoundLane);
continue;
}
for (User *U : Scalar->users()) {
@@ -6587,16 +6596,24 @@ void BoUpSLP::buildExternalUses(
continue;
// Skip in-tree scalars that become vectors
- if (TreeEntry *UseEntry = getTreeEntry(U)) {
+ if (ArrayRef<TreeEntry *> UseEntries = getTreeEntries(U);
+ !UseEntries.empty()) {
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in FoundLane will
// be used.
- if (UseEntry->State == TreeEntry::ScatterVectorize ||
- !doesInTreeUserNeedToExtract(
- Scalar, getRootEntryInstruction(*UseEntry), TLI, TTI)) {
+ if (any_of(UseEntries, [&](TreeEntry *UseEntry) {
+ return UseEntry->State == TreeEntry::ScatterVectorize ||
+ !doesInTreeUserNeedToExtract(
+ Scalar, getRootEntryInstruction(*UseEntry), TLI,
+ TTI);
+ })) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
- assert(!UseEntry->isGather() && "Bad state");
+ assert(none_of(UseEntries,
+ [](TreeEntry *UseEntry) {
+ return UseEntry->isGather();
+ }) &&
+ "Bad state");
continue;
}
U = nullptr;
@@ -6613,7 +6630,7 @@ void BoUpSLP::buildExternalUses(
<< " from lane " << FoundLane << " from " << *Scalar
<< ".\n");
It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first;
- ExternalUses.emplace_back(Scalar, U, FoundLane);
+ ExternalUses.emplace_back(Scalar, U, *Entry, FoundLane);
if (!U)
break;
}
@@ -6644,7 +6661,7 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const {
!isValidElementType(SI->getValueOperand()->getType()))
continue;
// Skip entry if already
- if (getTreeEntry(U))
+ if (isVectorized(U))
continue;
Value *Ptr =
@@ -7027,10 +7044,11 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (User *U : LI->users()) {
if (auto *UI = dyn_cast<Instruction>(U); UI && isDeleted(UI))
continue;
- if (const TreeEntry *UTE = getTreeEntry(U)) {
+ for (const TreeEntry *UTE : getTreeEntries(U)) {
for (int I : seq<int>(UTE->getNumOperands())) {
- if (all_of(UTE->getOperand(I),
- [LI](Value *V) { return V == LI; }))
+ if (all_of(UTE->getOperand(I), [LI](Value *V) {
+ return V == LI || isa<PoisonValue>(V);
+ }))
// Found legal broadcast - do not vectorize.
return false;
}
@@ -7135,7 +7153,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
int LastDist = LocalLoadsDists.front().second;
bool AllowMaskedGather = IsMaskedGatherSupported(OriginalLoads);
for (const std::pair<LoadInst *, int> &L : LocalLoadsDists) {
- if (getTreeEntry(L.first))
+ if (isVectorized(L.first))
continue;
assert(LastDist >= L.second &&
"Expected first distance always not less than second");
@@ -7187,9 +7205,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (auto [Slice, _] : Results) {
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads ("
<< Slice.size() << ")\n");
- if (any_of(Slice, [&](Value *V) { return getTreeEntry(V); })) {
+ if (any_of(Slice, [&](Value *V) { return isVectorized(V); })) {
for (Value *L : Slice)
- if (!getTreeEntry(L))
+ if (!isVectorized(L))
SortedNonVectorized.push_back(cast<LoadInst>(L));
continue;
}
@@ -7228,7 +7246,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
any_of(E->Scalars, [&, Slice = Slice](Value *V) {
if (isa<Constant>(V))
return false;
- if (getTreeEntry(V))
+ if (isVectorized(V))
return true;
const auto &Nodes = ValueToGatherNodes.at(V);
return (Nodes.size() != 1 || !Nodes.contains(E)) &&
@@ -7315,7 +7333,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
for (unsigned I = 0, E = Slice.size(); I < E; I += VF) {
ArrayRef<Value *> SubSlice =
Slice.slice(I, std::min(VF, E - I));
- if (getTreeEntry(SubSlice.front()))
+ if (isVectorized(SubSlice.front()))
continue;
// Check if the subslice is to be-vectorized entry, which is not
// equal to entry.
@@ -7585,7 +7603,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
DenseMap<Value *, unsigned> Uniques;
for (Value *V : Op) {
if (isa<Constant, ExtractElementInst>(V) ||
- getTreeEntry(V) || (L && L->isLoopInvariant(V))) {
+ isVectorized(V) || (L && L->isLoopInvariant(V))) {
if (isa<UndefValue>(V))
++UndefCnt;
continue;
@@ -7603,7 +7621,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
return none_of(Uniques, [&](const auto &P) {
return P.first->hasNUsesOrMore(P.second + 1) &&
none_of(P.first->users(), [&](User *U) {
- return getTreeEntry(U) || Uniques.contains(U);
+ return isVectorized(U) || Uniques.contains(U);
});
});
}) ||
@@ -8167,59 +8185,25 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check if this is a duplicate of another entry.
if (S) {
- if (TreeEntry *E = getTreeEntry(S.getMainOp())) {
- LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp()
- << ".\n");
- if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) {
- auto It = MultiNodeScalars.find(S.getMainOp());
- if (It != MultiNodeScalars.end()) {
- auto *TEIt = find_if(It->getSecond(),
- [&](TreeEntry *ME) { return ME->isSame(VL); });
- if (TEIt != It->getSecond().end())
- E = *TEIt;
- else
- E = nullptr;
- } else {
- E = nullptr;
- }
- }
- if (!E) {
- if (!doesNotNeedToBeScheduled(S.getMainOp())) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
- SmallPtrSet<const TreeEntry *, 4> Nodes;
- Nodes.insert(getTreeEntry(S.getMainOp()));
- for (const TreeEntry *E : MultiNodeScalars.lookup(S.getMainOp()))
- Nodes.insert(E);
- SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
- if (any_of(Nodes, [&](const TreeEntry *E) {
- if (all_of(E->Scalars,
- [&](Value *V) { return Values.contains(V); }))
- return true;
- SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
- E->Scalars.end());
- return (
- all_of(VL, [&](Value *V) { return EValues.contains(V); }));
- })) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndices);
- return;
- }
- } else {
- // Record the reuse of the tree node. FIXME, currently this is only
- // used to properly draw the graph rather than for the actual
- // vectorization.
+ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n");
+ for (TreeEntry *E : getTreeEntries(S.getMainOp())) {
+ if (E->isSame(VL)) {
+ // Record the reuse of the tree node.
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
<< ".\n");
return;
}
+ SmallPtrSet<Value *, 8> Values(E->Scalars.begin(), E->Scalars.end());
+ if (all_of(VL, [&](Value *V) {
+ return isa<PoisonValue>(V) || Values.contains(V);
+ })) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ return;
+ }
}
}
@@ -8371,7 +8355,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
doesNotNeedToBeScheduled(V))
continue;
- if (getTreeEntry(V)) {
+ if (isVectorized(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
if (TryToFindDuplicates(S))
@@ -9029,8 +9013,7 @@ bool BoUpSLP::areAllUsersVectorized(
Instruction *I, const SmallDenseSet<Value *> *VectorizedVals) const {
return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) ||
all_of(I->users(), [this](User *U) {
- return ScalarToTreeEntry.contains(U) ||
- isVectorLikeInstWithConstOps(U) ||
+ return isVectorized(U) || isVectorLikeInstWithConstOps(U) ||
(isa<ExtractElementInst>(U) && MustGather.contains(U));
});
}
@@ -9844,13 +9827,9 @@ void BoUpSLP::transformNodes() {
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
// If any instruction is vectorized already - do not try again.
// Reuse the existing node, if it fully matches the slice.
- if (const TreeEntry *SE = getTreeEntry(Slice.front());
- SE || getTreeEntry(Slice.back())) {
- if (!SE)
- continue;
- if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
- continue;
- }
+ if (isVectorized(Slice.front()) &&
+ !getSameValuesTreeEntry(Slice.front(), Slice, /*SameVF=*/true))
+ continue;
// Constant already handled effectively - skip.
if (allConstant(Slice))
continue;
@@ -9933,12 +9912,8 @@ void BoUpSLP::transformNodes() {
for (auto [Cnt, Sz] : Slices) {
ArrayRef<Value *> Slice = VL.slice(Cnt, Sz);
// If any instruction is vectorized already - do not try again.
- if (TreeEntry *SE = getTreeEntry(Slice.front());
- SE || getTreeEntry(Slice.back())) {
- if (!SE)
- continue;
- if (VF != SE->getVectorFactor() || !SE->isSame(Slice))
- continue;
+ if (TreeEntry *SE = getSameValuesTreeEntry(Slice.front(), Slice,
+ /*SameVF=*/true)) {
SE->UserTreeIndices.emplace_back(&E, UINT_MAX);
AddCombinedNode(SE->Idx, Cnt, Sz);
continue;
@@ -10724,7 +10699,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
auto *EE = cast<Ext...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124914
More information about the llvm-commits
mailing list