[llvm] b7a4ace - [SLP][NFC]Improve compile time by size analysis limit and reduction size
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 14:46:21 PDT 2024
Author: Alexey Bataev
Date: 2024-03-27T14:46:04-07:00
New Revision: b7a4ace72edf79f8250df2b08f0c14177d346770
URL: https://github.com/llvm/llvm-project/commit/b7a4ace72edf79f8250df2b08f0c14177d346770
DIFF: https://github.com/llvm/llvm-project/commit/b7a4ace72edf79f8250df2b08f0c14177d346770.diff
LOG: [SLP][NFC]Improve compile time by size analysis limit and reduction size
limit.
Used RecursionMaxDepth to limit number of lookups in BoUpSLP::getVectorElementSize and limited reduction width for bool reduced values.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7f528848002b5b..961380ce4ad9f2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13928,26 +13928,29 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// that feed it. The type of the loaded value may indicate a more suitable
// width than V's type. We want to base the vector element size on the width
// of memory operations where possible.
- SmallVector<std::pair<Instruction *, BasicBlock *>, 16> Worklist;
+ SmallVector<std::tuple<Instruction *, BasicBlock *, unsigned>> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
if (auto *I = dyn_cast<Instruction>(V)) {
- Worklist.emplace_back(I, I->getParent());
+ Worklist.emplace_back(I, I->getParent(), 0);
Visited.insert(I);
}
// Traverse the expression tree in bottom-up order looking for loads. If we
// encounter an instruction we don't yet handle, we give up.
auto Width = 0u;
+ Value *FirstNonBool = nullptr;
while (!Worklist.empty()) {
- Instruction *I;
- BasicBlock *Parent;
- std::tie(I, Parent) = Worklist.pop_back_val();
+ auto [I, Parent, Level] = Worklist.pop_back_val();
// We should only be looking at scalar instructions here. If the current
// instruction has a vector type, skip.
auto *Ty = I->getType();
if (isa<VectorType>(Ty))
continue;
+ if (Ty != Builder.getInt1Ty() && !FirstNonBool)
+ FirstNonBool = I;
+ if (Level > RecursionMaxDepth)
+ continue;
// If the current instruction is a load, update MaxWidth to reflect the
// width of the loaded value.
@@ -13960,11 +13963,16 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// user or the use is a PHI node, we add it to the worklist.
else if (isa<PHINode, CastInst, GetElementPtrInst, CmpInst, SelectInst,
BinaryOperator, UnaryOperator>(I)) {
- for (Use &U : I->operands())
+ for (Use &U : I->operands()) {
if (auto *J = dyn_cast<Instruction>(U.get()))
if (Visited.insert(J).second &&
- (isa<PHINode>(I) || J->getParent() == Parent))
- Worklist.emplace_back(J, J->getParent());
+ (isa<PHINode>(I) || J->getParent() == Parent)) {
+ Worklist.emplace_back(J, J->getParent(), Level + 1);
+ continue;
+ }
+ if (!FirstNonBool && U.get()->getType() != Builder.getInt1Ty())
+ FirstNonBool = U.get();
+ }
} else {
break;
}
@@ -13974,8 +13982,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
// gave up for some reason, just return the width of V. Otherwise, return the
// maximum width we found.
if (!Width) {
- if (auto *CI = dyn_cast<CmpInst>(V))
- V = CI->getOperand(0);
+ if (V->getType() == Builder.getInt1Ty() && FirstNonBool)
+ V = FirstNonBool;
Width = DL->getTypeSizeInBits(V->getType());
}
@@ -15838,7 +15846,9 @@ class HorizontalReduction {
RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize);
unsigned ReduxWidth = std::min<unsigned>(
- llvm::bit_floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
+ llvm::bit_floor(NumReducedVals),
+ std::clamp<unsigned>(MaxElts, RedValsMaxNumber,
+ RegMaxNumber * RedValsMaxNumber));
unsigned Start = 0;
unsigned Pos = Start;
// Restarts vectorization attempt with lower vector factor.
More information about the llvm-commits
mailing list