[llvm] [SLP] Reject 2-element vectorization when vector inst count exceeds scalar (PR #190414)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 5 23:30:21 PDT 2026
================
@@ -12817,6 +12829,112 @@ bool BoUpSLP::areAllUsersVectorized(
});
}
+unsigned BoUpSLP::getNumScalarInsts() const {
+ unsigned Count = 0;
+ for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+ const TreeEntry &TE = *Ptr;
+ if (DeletedNodes.contains(&TE))
+ continue;
+ if (TE.isGather() || TransformedToGatherNodes.contains(&TE)) {
+ // Count instruction scalars in gathers — they exist in the scalar
+ // code regardless of vectorization. ExtractElement instructions
+ // become free when the vector input is used directly.
+ for (Value *V : TE.Scalars)
+ if (isa<Instruction>(V))
+ ++Count;
+ continue;
+ }
+ // Each vectorize entry represents a bundle of scalar instructions.
+ // Count per-entry without cross-entry deduplication, since shared
+ // scalars across entries still represent separate work in scalar code.
+ for (Value *V : TE.Scalars) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || (TE.hasCopyableElements() && TE.isCopyableElement(V)))
+ continue;
+ ++Count;
+ }
+ }
+ return Count;
+}
+
+unsigned BoUpSLP::getNumVectorInsts() const {
+ unsigned Count = 0;
+ SmallPtrSet<Value *, 4> GatherExtractSourceVecs;
+ for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+ const TreeEntry &TE = *Ptr;
+ if (DeletedNodes.contains(&TE))
+ continue;
+ if (TE.State == TreeEntry::CombinedVectorize)
+ continue;
+ if (TE.CombinedOp == TreeEntry::ReducedBitcast ||
+ TE.CombinedOp == TreeEntry::ReducedBitcastBSwap ||
+ TE.CombinedOp == TreeEntry::ReducedBitcastLoads ||
+ TE.CombinedOp == TreeEntry::ReducedBitcastBSwapLoads ||
+ TE.CombinedOp == TreeEntry::ReducedCmpBitcast)
+ continue;
+ bool IsGatherOrTransformed =
+ TE.isGather() || TransformedToGatherNodes.contains(&TE);
+ if (IsGatherOrTransformed) {
+ if (TE.hasState()) {
+ if (const TreeEntry *E =
+ getSameValuesTreeEntry(TE.getMainOp(), TE.Scalars);
+ E && E->getVectorFactor() == TE.getVectorFactor())
+ continue;
+ SmallVector<Value *> RevScalars(TE.Scalars.rbegin(), TE.Scalars.rend());
+ if (const TreeEntry *E =
+ getSameValuesTreeEntry(TE.getMainOp(), RevScalars);
+ E && E->getVectorFactor() == TE.getVectorFactor()) {
+ ++Count;
+ continue;
+ }
+ }
+ // ExtractElement gathers from the same source vector become a single
+ // shufflevector. Collect source vectors globally across all gather
+ // entries and count once at the end.
+ if (all_of(TE.Scalars,
+ IsaPred<ExtractElementInst, UndefValue, Constant>)) {
+ for (Value *V : TE.Scalars)
+ if (auto *EE = dyn_cast<ExtractElementInst>(V))
+ GatherExtractSourceVecs.insert(EE->getVectorOperand());
+ } else {
+ for (Value *V : TE.Scalars) {
+ if (!isConstant(V) && !isa<PoisonValue>(V))
----------------
bababuck wrote:
`&& !isa<PoisonValue>` is redundant (so is `UndefValue` on line 12895). Not sure if this was intentional for readability though.
https://github.com/llvm/llvm-project/pull/190414
More information about the llvm-commits
mailing list