[llvm] 51aac5b - [SLP][NFCI]Improve compile time for phis with large number of incoming values.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 30 14:43:06 PDT 2024
Author: Alexey Bataev
Date: 2024-04-30T14:42:49-07:00
New Revision: 51aac5b04362daf99124d20ea93837b86b833f90
URL: https://github.com/llvm/llvm-project/commit/51aac5b04362daf99124d20ea93837b86b833f90
DIFF: https://github.com/llvm/llvm-project/commit/51aac5b04362daf99124d20ea93837b86b833f90.diff
LOG: [SLP][NFCI]Improve compile time for phis with large number of incoming values.
Added a limit of 128 incoming values at max for PHIs nodes to be
vectorized plus improved performance by using logarithmic search instead
of linear if the number of incoming values is > 4.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3a1b0d39a4dcc..681081de13e011 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -216,6 +216,9 @@ static const unsigned MaxMemDepDistance = 160;
/// regions to be handled.
static const int MinScheduleRegionSize = 16;
+/// Maximum allowed number of operands in the PHI nodes.
+static const unsigned MaxPHINumOperands = 128;
+
/// Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
@@ -6001,6 +6004,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
auto *VL0 = cast<Instruction>(S.OpValue);
switch (ShuffleOrOp) {
case Instruction::PHI: {
+ // Too many operands - gather, most probably won't be vectorized.
+ if (VL0->getNumOperands() > MaxPHINumOperands)
+ return TreeEntry::NeedToGather;
// Check for terminator values (e.g. invoke).
for (Value *V : VL)
for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
@@ -6307,6 +6313,85 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
}
}
+namespace {
+/// Allows to correctly handle operands of the phi nodes based on the \p Main
+/// PHINode order of incoming basic blocks/values.
+class PHIHandler {
+ DominatorTree &DT;
+ PHINode *Main = nullptr;
+ SmallVector<Value *> Phis;
+ SmallVector<SmallVector<Value *>> Operands;
+
+public:
+ PHIHandler() = delete;
+ PHIHandler(DominatorTree &DT, PHINode *Main, ArrayRef<Value *> Phis)
+ : DT(DT), Main(Main), Phis(Phis),
+ Operands(Main->getNumIncomingValues(),
+ SmallVector<Value *>(Phis.size(), nullptr)) {}
+ void buildOperands() {
+ constexpr unsigned FastLimit = 4;
+ if (Main->getNumIncomingValues() <= FastLimit) {
+ for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
+ BasicBlock *InBB = Main->getIncomingBlock(I);
+ if (!DT.isReachableFromEntry(InBB)) {
+ Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
+ continue;
+ }
+ // Prepare the operand vector.
+ for (auto [Idx, V] : enumerate(Phis)) {
+ auto *P = cast<PHINode>(V);
+ if (P->getIncomingBlock(I) == InBB)
+ Operands[I][Idx] = P->getIncomingValue(I);
+ else
+ Operands[I][Idx] = P->getIncomingValueForBlock(InBB);
+ }
+ }
+ return;
+ }
+ SmallDenseMap<BasicBlock *, SmallVector<unsigned>, 4> Blocks;
+ for (unsigned I : seq<unsigned>(0, Main->getNumIncomingValues())) {
+ BasicBlock *InBB = Main->getIncomingBlock(I);
+ if (!DT.isReachableFromEntry(InBB)) {
+ Operands[I].assign(Phis.size(), PoisonValue::get(Main->getType()));
+ continue;
+ }
+ Blocks.try_emplace(InBB).first->second.push_back(I);
+ }
+ for (auto [Idx, V] : enumerate(Phis)) {
+ auto *P = cast<PHINode>(V);
+ for (unsigned I : seq<unsigned>(0, P->getNumIncomingValues())) {
+ BasicBlock *InBB = P->getIncomingBlock(I);
+ if (InBB == Main->getIncomingBlock(I)) {
+ if (isa_and_nonnull<PoisonValue>(Operands[I][Idx]))
+ continue;
+ Operands[I][Idx] = P->getIncomingValue(I);
+ continue;
+ }
+ auto It = Blocks.find(InBB);
+ if (It == Blocks.end())
+ continue;
+ Operands[It->second.front()][Idx] = P->getIncomingValue(I);
+ }
+ }
+ for (const auto &P : Blocks) {
+ if (P.getSecond().size() <= 1)
+ continue;
+ unsigned BasicI = P.getSecond().front();
+ for (unsigned I : ArrayRef(P.getSecond()).drop_front()) {
+ assert(all_of(enumerate(Operands[I]),
+ [&](const auto &Data) {
+ return !Data.value() ||
+ Data.value() == Operands[BasicI][Data.index()];
+ }) &&
+ "Expected empty operands list.");
+ Operands[I] = Operands[BasicI];
+ }
+ }
+ }
+ ArrayRef<Value *> getOperands(unsigned I) const { return Operands[I]; }
+};
+} // namespace
+
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -6675,24 +6760,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
// Keeps the reordered operands to avoid code duplication.
- SmallVector<ValueList, 2> OperandsVec;
- for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
- if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
- ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
- TE->setOperand(I, Operands);
- OperandsVec.push_back(Operands);
- continue;
- }
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL)
- Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
- PH->getIncomingBlock(I)));
- TE->setOperand(I, Operands);
- OperandsVec.push_back(Operands);
- }
- for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
- buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
+ PHIHandler Handler(*DT, PH, VL);
+ Handler.buildOperands();
+ for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
+ TE->setOperand(I, Handler.getOperands(I));
+ for (unsigned I : seq<unsigned>(0, PH->getNumOperands()))
+ buildTree_rec(Handler.getOperands(I), Depth + 1, {TE, I});
return;
}
case Instruction::ExtractValue:
@@ -18166,8 +18239,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Collect the incoming values from the PHIs.
Incoming.clear();
for (Instruction &I : *BB) {
- PHINode *P = dyn_cast<PHINode>(&I);
- if (!P)
+ auto *P = dyn_cast<PHINode>(&I);
+ if (!P || P->getNumIncomingValues() > MaxPHINumOperands)
break;
// No need to analyze deleted, vectorized and non-vectorizable
More information about the llvm-commits
mailing list