[PATCH] D80241: [SLP] PR45269 Fix getVectorElementSize() is slow
Dinar Temirbulatov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 19 14:19:01 PDT 2020
dtemirbulatov created this revision.
dtemirbulatov added reviewers: ABataev, RKSimon, spatel, anton-afanasyev, nikic.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.
The algorithm inside getVectorElementSize() is almost O(x^2) complexity and when, for example, we compile MultiSource/Applications/ClamAV/shared_sha256.c with 1k instructions inside sha256_transform() function that resulted in almost ~800k iterations. The following change improves the algorithm with a map to a liner complexity. Also, I added a check for the root instruction basic block belongings to avoid any instruction from a different basic block.
https://reviews.llvm.org/D80241
Files:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -629,7 +629,7 @@
/// the stored value. Otherwise, the size is the width of the largest loaded
/// value reaching V. This method is used by the vectorizer to calculate
/// vectorization factors.
- unsigned getVectorElementSize(Value *V) const;
+ unsigned getVectorElementSize(Value *V);
/// Compute the minimum type sizes required to represent the entries in a
/// vectorizable tree.
@@ -1715,6 +1715,9 @@
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
+ /// Maps an instruction to the proposed vectorizable size.
+ SmallDenseMap<Instruction *, unsigned> InstrElementSize;
+
/// A list of scalars that we found that we need to keep as scalars.
ValueSet MustGather;
@@ -4797,6 +4800,7 @@
}
Builder.ClearInsertionPoint();
+ InstrElementSize.clear();
return VectorizableTree[0]->VectorizedValue;
}
@@ -5333,7 +5337,7 @@
BS->ScheduleStart = nullptr;
}
-unsigned BoUpSLP::getVectorElementSize(Value *V) const {
+unsigned BoUpSLP::getVectorElementSize(Value *V) {
// If V is a store, just return the width of the stored value without
// traversing the expression tree. This is the common case.
if (auto *Store = dyn_cast<StoreInst>(V))
@@ -5343,11 +5347,17 @@
// that feed it. The type of the loaded value may indicate a more suitable
// width than V's type. We want to base the vector element size on the width
// of memory operations where possible.
+ auto *Inst = dyn_cast<Instruction>(V);
SmallVector<Instruction *, 16> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
- if (auto *I = dyn_cast<Instruction>(V)) {
- Worklist.push_back(I);
- Visited.insert(I);
+ BasicBlock *P = nullptr;
+ if (Inst) {
+ P = Inst->getParent();
+ auto E = InstrElementSize.find(Inst);
+ if (E != InstrElementSize.end())
+ return E->second;
+ Worklist.push_back(Inst);
+ Visited.insert(Inst);
}
// Traverse the expression tree in bottom-up order looking for loads. If we
@@ -5375,7 +5385,7 @@
isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I)) {
for (Use &U : I->operands())
if (auto *J = dyn_cast<Instruction>(U.get()))
- if (Visited.insert(J).second)
+ if (P == J->getParent() && Visited.insert(J).second)
Worklist.push_back(J);
}
@@ -5384,13 +5394,16 @@
FoundUnknownInst = true;
}
+ int Width = MaxWidth;
// If we didn't encounter a memory access in the expression tree, or if we
// gave up for some reason, just return the width of V.
if (!MaxWidth || FoundUnknownInst)
- return DL->getTypeSizeInBits(V->getType());
+ Width = DL->getTypeSizeInBits(V->getType());
+
+ for (Instruction *I : Visited)
+ InstrElementSize[I] = Width;
- // Otherwise, return the maximum width we found.
- return MaxWidth;
+ return Width;
}
// Determine if a value V in a vectorizable expression Expr can be demoted to a
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80241.265023.patch
Type: text/x-patch
Size: 3205 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200519/0731e33c/attachment.bin>
More information about the llvm-commits
mailing list