[llvm] [SLP] Check for extracts, being replaced by original scalars, for user nodes (PR #149572)

Tue Aug 5 03:17:11 PDT 2025

================
@@ -9151,6 +9165,93 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
   return {IntrinsicCost, LibCost};
 }
 
+/// Check if extracts are cheaper than the original scalars.
+static bool
+areExtractsCheaperThanScalars(TargetTransformInfo &TTI, Type *UserScalarTy,
+                              VectorType *UserVecTy, const APInt &DemandedElts,
+                              const InstructionCost UserScalarsCost,
+                              Type *ScalarTy, unsigned VF, ArrayRef<int> Mask,
+                              InstructionCost UserEntryCost) {
+  constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  // If extracts are cheaper than the original scalars - success.
+  InstructionCost ExtractCost =
+      ::getScalarizationOverhead(TTI, UserScalarTy, UserVecTy, DemandedElts,
+                                 /*Insert=*/false, /*Extract=*/true, CostKind);
+  if (ExtractCost <= UserScalarsCost)
+    return true;
+  // The node is profitable for vectorization - success.
+  if (ExtractCost <= UserEntryCost)
+    return true;
+  auto *VecTy = getWidenedType(ScalarTy, VF);
+  InstructionCost ScalarsCost =
+      ::getScalarizationOverhead(TTI, ScalarTy, VecTy, APInt::getAllOnes(VF),
+                                 /*Insert=*/true, /*Extract=*/false, CostKind);
+  if (!Mask.empty())
+    ScalarsCost +=
+        getShuffleCost(TTI, TTI::SK_PermuteSingleSrc, VecTy, Mask, CostKind);
+  return ExtractCost < UserScalarsCost + ScalarsCost;
+}
+
+bool BoUpSLP::isProfitableToVectorizeWithNonVecUsers(
+    const InstructionsState &S, const EdgeInfo &UserTreeIdx,
+    ArrayRef<Value *> VL, ArrayRef<int> Mask) {
+  assert(S && "Expected valid instructions state.");
+  // Loads, extracts and geps are immediately scalarizable, so no need to check.
+  if (S.getOpcode() == Instruction::Load ||
+      S.getOpcode() == Instruction::ExtractElement ||
+      S.getOpcode() == Instruction::GetElementPtr)
+    return true;
----------------
gbossu wrote:

But the cost of keeping a scalar load might be pretty high compared to just extracting it. What would happen if you remove this early exit and run through the cost checks below just like for other opcodes?

https://github.com/llvm/llvm-project/pull/149572