[llvm] 319a722 - [SLP][NFC]Improve compile time, NFC.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon May 23 12:19:08 PDT 2022
Author: Alexey Bataev
Date: 2022-05-23T12:15:27-07:00
New Revision: 319a722f6fca365c8f71f457eac60bc3909988ee
URL: https://github.com/llvm/llvm-project/commit/319a722f6fca365c8f71f457eac60bc3909988ee
DIFF: https://github.com/llvm/llvm-project/commit/319a722f6fca365c8f71f457eac60bc3909988ee.diff
LOG: [SLP][NFC]Improve compile time, NFC.
Builds UserIgnore list only once as a SmallDenseSet without rebuilding
it between the runs, iterate over gathers instead list of reduction ops,
do some checks in the buildTree_rec only if the corresponding containers
are not empty.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4df7aaca7362..beb54dc7822c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -895,7 +895,10 @@ class BoUpSLP {
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
void buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst = None);
+ const SmallDenseSet<Value *> &UserIgnoreLst);
+
+ /// Construct a vectorizable tree that starts at \p Roots.
+ void buildTree(ArrayRef<Value *> Roots);
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
@@ -916,6 +919,7 @@ class BoUpSLP {
}
MinBWs.clear();
InstrElementSize.clear();
+ UserIgnoreList = nullptr;
}
unsigned getTreeSize() const { return VectorizableTree.size(); }
@@ -2073,8 +2077,8 @@ class BoUpSLP {
AnalyzedReductionVals.clear();
}
/// Checks if the given value is gathered in one of the nodes.
- bool isGathered(Value *V) const {
- return MustGather.contains(V);
+ bool isAnyGathered(const SmallDenseSet<Value *> &Vals) const {
+ return any_of(MustGather, [&](Value *V) { return Vals.contains(V); });
}
~BoUpSLP();
@@ -3192,7 +3196,7 @@ class BoUpSLP {
void scheduleBlock(BlockScheduling *BS);
/// List of users to ignore during scheduling and that don't need extracting.
- SmallPtrSet<Value *, 4> UserIgnoreList;
+ const SmallDenseSet<Value *> *UserIgnoreList = nullptr;
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
/// sorted SmallVectors of unsigned.
@@ -4119,7 +4123,7 @@ void BoUpSLP::buildExternalUses(
}
// Ignore users in the user ignore list.
- if (UserIgnoreList.contains(UserInst))
+ if (UserIgnoreList && UserIgnoreList->contains(UserInst))
continue;
LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
@@ -4276,10 +4280,16 @@ BoUpSLP::findExternalStoreUsersReorderIndices(TreeEntry *TE) const {
}
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst) {
+ const SmallDenseSet<Value *> &UserIgnoreLst) {
+ deleteTree();
+ UserIgnoreList = &UserIgnoreLst;
+ if (!allSameType(Roots))
+ return;
+ buildTree_rec(Roots, 0, EdgeInfo());
+}
+
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
deleteTree();
- UserIgnoreList.clear();
- UserIgnoreList.insert(UserIgnoreLst.begin(), UserIgnoreLst.end());
if (!allSameType(Roots))
return;
buildTree_rec(Roots, 0, EdgeInfo());
@@ -4595,12 +4605,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// the same block.
// Don't vectorize ephemeral values.
- for (Value *V : VL) {
- if (EphValues.count(V)) {
- LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
- << ") is ephemeral.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
- return;
+ if (!EphValues.empty()) {
+ for (Value *V : VL) {
+ if (EphValues.count(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
+ << ") is ephemeral.\n");
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ return;
+ }
}
}
@@ -4638,13 +4650,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
- for (Value *V : VL) {
- if (UserIgnoreList.contains(V)) {
- LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- if (TryToFindDuplicates(S))
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
+ if (UserIgnoreList && !UserIgnoreList->empty()) {
+ for (Value *V : VL) {
+ if (UserIgnoreList && UserIgnoreList->contains(V)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ return;
+ }
}
}
@@ -8570,7 +8584,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
- assert((getTreeEntry(U) || UserIgnoreList.contains(U) ||
+ assert((getTreeEntry(U) ||
+ (UserIgnoreList && UserIgnoreList->contains(U)) ||
(isa_and_nonnull<Instruction>(U) &&
isDeleted(cast<Instruction>(U)))) &&
"Deleting out-of-tree value");
@@ -10689,6 +10704,8 @@ class HorizontalReduction {
/// Attempt to vectorize the tree found by matchAssociativeReduction.
Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
constexpr int ReductionLimit = 4;
+ constexpr unsigned RegMaxNumber = 4;
+ constexpr unsigned RedValsMaxNumber = 128;
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
@@ -10726,12 +10743,12 @@ class HorizontalReduction {
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
- SmallVector<Value *> IgnoreList;
+ SmallDenseSet<Value *> IgnoreList;
for (ReductionOpsType &RdxOps : ReductionOps)
for (Value *RdxOp : RdxOps) {
if (!RdxOp)
continue;
- IgnoreList.push_back(RdxOp);
+ IgnoreList.insert(RdxOp);
}
bool IsCmpSelMinMax = isCmpSelMinMax(cast<Instruction>(ReductionRoot));
@@ -10793,7 +10810,12 @@ class HorizontalReduction {
if (NumReducedVals < ReductionLimit)
continue;
- unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
+ unsigned MaxVecRegSize = V.getMaxVecRegSize();
+ unsigned EltSize = V.getVectorElementSize(Candidates[0]);
+ unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize);
+
+ unsigned ReduxWidth = std::min<unsigned>(
+ PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
unsigned Start = 0;
unsigned Pos = Start;
// Restarts vectorization attempt with lower vector factor.
@@ -10803,10 +10825,7 @@ class HorizontalReduction {
&CheckForReusedReductionOpsLocal,
&PrevReduxWidth, &V,
&IgnoreList](bool IgnoreVL = false) {
- bool IsAnyRedOpGathered =
- !IgnoreVL && any_of(IgnoreList, [&V](Value *RedOp) {
- return V.isGathered(RedOp);
- });
+ bool IsAnyRedOpGathered = !IgnoreVL && V.isAnyGathered(IgnoreList);
if (!CheckForReusedReductionOpsLocal && PrevReduxWidth == ReduxWidth) {
// Check if any of the reduction ops are gathered. If so, worth
// trying again with less number of reduction ops.
@@ -10871,13 +10890,37 @@ class HorizontalReduction {
LocalExternallyUsedValues[TrackedVals[V]];
});
}
- for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
- if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
+ // Number of uses of the candidates in the vector of values.
+ SmallDenseMap<Value *, unsigned> NumUses;
+ for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (NumUses.count(V) > 0)
+ continue;
+ NumUses[V] = std::count(VL.begin(), VL.end(), V);
+ }
+ for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (NumUses.count(V) > 0)
+ continue;
+ NumUses[V] = std::count(VL.begin(), VL.end(), V);
+ }
+ // Gather externally used values.
+ SmallPtrSet<Value *, 4> Visited;
+ for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (!Visited.insert(V).second)
+ continue;
+ unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V];
+ if (NumOps != ReducedValsToOps.find(V)->second.size())
+ LocalExternallyUsedValues[V];
+ }
+ for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
+ Value *V = Candidates[Cnt];
+ if (!Visited.insert(V).second)
continue;
- unsigned NumOps = VectorizedVals.lookup(Candidates[Cnt]) +
- std::count(VL.begin(), VL.end(), Candidates[Cnt]);
- if (NumOps != ReducedValsToOps.find(Candidates[Cnt])->second.size())
- LocalExternallyUsedValues[Candidates[Cnt]];
+ unsigned NumOps = VectorizedVals.lookup(V) + NumUses[V];
+ if (NumOps != ReducedValsToOps.find(V)->second.size())
+ LocalExternallyUsedValues[V];
}
V.buildExternalUses(LocalExternallyUsedValues);
More information about the llvm-commits
mailing list