[llvm] [SLP] Check for extracts, being replaced by original scalars, for user nodes (PR #149572)
Gaƫtan Bossu via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 02:01:59 PDT 2025
================
@@ -9149,6 +9163,81 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}
+bool BoUpSLP::isProfitableToVectorizeWithNonVecUsers(
+ const InstructionsState &S, const EdgeInfo &UserTreeIdx,
+ ArrayRef<Value *> Scalars, ArrayRef<int> ScalarsMask) {
+ assert(S && "Expected valid instructions state.");
+ // Loads, extracts and geps are immediately scalarizable, so no need to check.
+ if (S.getOpcode() == Instruction::Load ||
+ S.getOpcode() == Instruction::ExtractElement ||
+ S.getOpcode() == Instruction::GetElementPtr)
+ return true;
+ // Check only vectorized users, others scalarized (potentially, at least)
+ // already.
+ if (!UserTreeIdx.UserTE || UserTreeIdx.UserTE->isGather() ||
+ UserTreeIdx.UserTE->State == TreeEntry::SplitVectorize)
+ return true;
+ // PHI nodes may have cyclic deps, so cannot check here.
+ if (UserTreeIdx.UserTE->getOpcode() == Instruction::PHI)
+ return true;
+ // Do not check root reduction nodes, they do not have non-vectorized users.
+ if (UserIgnoreList && UserTreeIdx.UserTE->Idx == 0)
+ return true;
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ ArrayRef<Value *> VL = UserTreeIdx.UserTE->Scalars;
+ Type *UserScalarTy = getValueType(VL.front());
+ if (!isValidElementType(UserScalarTy))
+ return true;
+ Type *ScalarTy = getValueType(Scalars.front());
+ if (!isValidElementType(ScalarTy))
+ return true;
+ // Ignore subvectors extracts.
+ if (UserScalarTy->isVectorTy())
+ return true;
+ auto *UserVecTy =
+ getWidenedType(UserScalarTy, UserTreeIdx.UserTE->getVectorFactor());
+ APInt DemandedElts = APInt::getZero(UserTreeIdx.UserTE->getVectorFactor());
+ // Check the external uses and check, if vector node + extracts is not
+ // profitable for the vectorization.
+ InstructionCost UserScalarsCost = 0;
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ continue;
+ if (areAllUsersVectorized(I, UserIgnoreList))
+ continue;
+ DemandedElts.setBit(UserTreeIdx.UserTE->findLaneForValue(V));
+ UserScalarsCost += TTI->getInstructionCost(I, CostKind);
+ }
+ // No non-vectorized users - success.
+ if (DemandedElts.isZero())
+ return true;
+ // If extracts are cheaper than the original scalars - success.
+ InstructionCost ExtractCost =
+ ::getScalarizationOverhead(*TTI, UserScalarTy, UserVecTy, DemandedElts,
+ /*Insert=*/false, /*Extract=*/true, CostKind);
+ if (ExtractCost <= UserScalarsCost)
+ return true;
+ SmallPtrSet<Value *, 4> CheckedExtracts;
+ InstructionCost NodeCost =
+ UserTreeIdx.UserTE->State == TreeEntry::CombinedVectorize
+ ? InstructionCost(0)
+ : getEntryCost(UserTreeIdx.UserTE, {}, CheckedExtracts);
+ // The node is profitable for vectorization - success.
+ if (ExtractCost + NodeCost <= -SLPCostThreshold)
+ return true;
----------------
gbossu wrote:
But the code is checking the cost of extracting vs keeping instructions in `UserVL`, not `VL`. The fate of `UserVL` has already been decided, and as you said, you're not going to change it here.
Shouldn't that code instead check the users of `VL`, especially those that are not in `UserVL` and that we consider "external"?
https://github.com/llvm/llvm-project/pull/149572
More information about the llvm-commits
mailing list