[llvm] [SLP] Reject 2-element vectorization when vector inst count exceeds scalar (PR #190414)

Sun Apr 5 23:30:21 PDT 2026

================
@@ -12817,6 +12829,112 @@ bool BoUpSLP::areAllUsersVectorized(
          });
 }
 
+unsigned BoUpSLP::getNumScalarInsts() const {
+  unsigned Count = 0;
+  for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+    const TreeEntry &TE = *Ptr;
+    if (DeletedNodes.contains(&TE))
+      continue;
+    if (TE.isGather() || TransformedToGatherNodes.contains(&TE)) {
+      // Count instruction scalars in gathers — they exist in the scalar
+      // code regardless of vectorization. ExtractElement instructions
+      // become free when the vector input is used directly.
+      for (Value *V : TE.Scalars)
+        if (isa<Instruction>(V))
+          ++Count;
+      continue;
+    }
+    // Each vectorize entry represents a bundle of scalar instructions.
+    // Count per-entry without cross-entry deduplication, since shared
+    // scalars across entries still represent separate work in scalar code.
+    for (Value *V : TE.Scalars) {
+      auto *I = dyn_cast<Instruction>(V);
+      if (!I || (TE.hasCopyableElements() && TE.isCopyableElement(V)))
+        continue;
+      ++Count;
+    }
+  }
+  return Count;
+}
+
+unsigned BoUpSLP::getNumVectorInsts() const {
+  unsigned Count = 0;
+  SmallPtrSet<Value *, 4> GatherExtractSourceVecs;
+  for (const std::unique_ptr<TreeEntry> &Ptr : VectorizableTree) {
+    const TreeEntry &TE = *Ptr;
+    if (DeletedNodes.contains(&TE))
+      continue;
+    if (TE.State == TreeEntry::CombinedVectorize)
+      continue;
+    if (TE.CombinedOp == TreeEntry::ReducedBitcast ||
+        TE.CombinedOp == TreeEntry::ReducedBitcastBSwap ||
+        TE.CombinedOp == TreeEntry::ReducedBitcastLoads ||
+        TE.CombinedOp == TreeEntry::ReducedBitcastBSwapLoads ||
+        TE.CombinedOp == TreeEntry::ReducedCmpBitcast)
+      continue;
+    bool IsGatherOrTransformed =
+        TE.isGather() || TransformedToGatherNodes.contains(&TE);
+    if (IsGatherOrTransformed) {
+      if (TE.hasState()) {
+        if (const TreeEntry *E =
+                getSameValuesTreeEntry(TE.getMainOp(), TE.Scalars);
+            E && E->getVectorFactor() == TE.getVectorFactor())
+          continue;
+        SmallVector<Value *> RevScalars(TE.Scalars.rbegin(), TE.Scalars.rend());
+        if (const TreeEntry *E =
+                getSameValuesTreeEntry(TE.getMainOp(), RevScalars);
+            E && E->getVectorFactor() == TE.getVectorFactor()) {
+          ++Count;
+          continue;
+        }
+      }
+      // ExtractElement gathers from the same source vector become a single
+      // shufflevector. Collect source vectors globally across all gather
+      // entries and count once at the end.
+      if (all_of(TE.Scalars,
+                 IsaPred<ExtractElementInst, UndefValue, Constant>)) {
+        for (Value *V : TE.Scalars)
+          if (auto *EE = dyn_cast<ExtractElementInst>(V))
+            GatherExtractSourceVecs.insert(EE->getVectorOperand());
+      } else {
+        for (Value *V : TE.Scalars) {
+          if (!isConstant(V) && !isa<PoisonValue>(V))
----------------
bababuck wrote:

`&& !isa<PoisonValue>` is redundant (so is `UndefValue` on line 12895). Not sure if this was intentional for readability though.

https://github.com/llvm/llvm-project/pull/190414