[llvm] b2d12d6 - [VPlan] Extend getSCEVForVPV, use to compute VPReplicateRecipe cost. (#161276)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 30 15:46:24 PDT 2025
Author: Florian Hahn
Date: 2025-10-30T15:46:19-07:00
New Revision: b2d12d6f2ba60a2f5c76cf535e2b215443a9c221
URL: https://github.com/llvm/llvm-project/commit/b2d12d6f2ba60a2f5c76cf535e2b215443a9c221
DIFF: https://github.com/llvm/llvm-project/commit/b2d12d6f2ba60a2f5c76cf535e2b215443a9c221.diff
LOG: [VPlan] Extend getSCEVForVPV, use to compute VPReplicateRecipe cost. (#161276)
Update getSCEVExprForVPValue to handle more complex expressions, to use
it in VPReplicateRecipe::comptueCost.
In particular, it supports construction SCEV expressions for
GetElementPtr VPReplicateRecipes, with operands that are
VPScalarIVStepsRecipe, VPDerivedIVRecipe and VPCanonicalIVRecipe. If we
hit a sub-expression we don't support yet, we return
SCEVCouldNotCompute.
Note that the SCEV expression is valid VF = 1: we only support
construction AddRecs for VPCanonicalIVRecipe, which is an AddRec
starting at 0 and stepping by 1. The returned SCEV expressions could be
converted to a VF specific one, by rewriting the AddRecs to ones with
the appropriate step.
Note that the logic for constructing SCEVs for GetElementPtr was
directly ported from ScalarEvolution.cpp.
Another thing to note is that we construct SCEV expression purely by
looking at the operation of the recipe and its translated operands, w/o
accessing the underlying IR (the exception being getting the source
element type for GEPs).
PR: https://github.com/llvm/llvm-project/pull/161276
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanHelpers.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
llvm/lib/Transforms/Vectorize/VPlanUtils.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 505fb435e91e6..25bf49db0e073 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
continue;
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(*Plan, VF, CostCtx);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
// Add on other costs that are modelled in VPlan, but not in the legacy
// cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
assert(VectorRegion && "Expected to have a vector region!");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6876,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
- VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
+ OrigLoop);
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
// Now compute and add the VPlan-based cost.
@@ -7110,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// case, don't trigger the assertion, as the extra simplifications may cause a
//
diff erent VF to be picked by the VPlan-based cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
// legacy cost model doesn't properly model costs for such loops.
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
+ !Legal->getLAI()->getSymbolicStrides().empty() ||
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
CostCtx, OrigLoop,
BestFactor.Width) ||
@@ -8441,7 +8443,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}
@@ -9911,7 +9913,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
- CM.CostKind, *CM.PSE.getSE());
+ CM.CostKind, *CM.PSE.getSE(), L);
if (!ForceVectorization &&
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
LVP.getPlanFor(VF.Width), SEL,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 2aaabd9ebdd04..965426f86ff21 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -350,13 +350,14 @@ struct VPCostContext {
SmallPtrSet<Instruction *, 8> SkipCostComputation;
TargetTransformInfo::TargetCostKind CostKind;
ScalarEvolution &SE;
+ const Loop *L;
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
const VPlan &Plan, LoopVectorizationCostModel &CM,
TargetTransformInfo::TargetCostKind CostKind,
- ScalarEvolution &SE)
+ ScalarEvolution &SE, const Loop *L)
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
- CostKind(CostKind), SE(SE) {}
+ CostKind(CostKind), SE(SE), L(L) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a63c802047ea..bde62dd6dd4bc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3167,26 +3167,30 @@ bool VPReplicateRecipe::shouldPack() const {
});
}
-/// Returns true if \p Ptr is a pointer computation for which the legacy cost
-/// model computes a SCEV expression when computing the address cost.
-static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
+/// Returns a SCEV expression for \p Ptr if it is a pointer computation for
+/// which the legacy cost model computes a SCEV expression when computing the
+/// address cost. Computing SCEVs for VPValues is incomplete and returns
+/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In
+/// those cases we fall back to the legacy cost model. Otherwise return nullptr.
+static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE,
+ const Loop *L) {
auto *PtrR = Ptr->getDefiningRecipe();
if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
Instruction::GetElementPtr) ||
isa<VPWidenGEPRecipe>(PtrR) ||
match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
- return false;
+ return nullptr;
// We are looking for a GEP where all indices are either loop invariant or
// inductions.
for (VPValue *Opd : drop_begin(PtrR->operands())) {
if (!Opd->isDefinedOutsideLoopRegions() &&
!isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
- return false;
+ return nullptr;
}
- return true;
+ return vputils::getSCEVExprForVPValue(Ptr, SE, L);
}
/// Returns true if \p V is used as part of the address of another load or
@@ -3354,9 +3358,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
bool IsLoad = UI->getOpcode() == Instruction::Load;
const VPValue *PtrOp = getOperand(!IsLoad);
- // TODO: Handle cases where we need to pass a SCEV to
- // getAddressComputationCost.
- if (shouldUseAddressAccessSCEV(PtrOp))
+ const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L);
+ if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))
break;
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
@@ -3374,7 +3377,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
- nullptr, Ctx.CostKind);
+ PtrSCEV, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4db92e7def3ed..54348c6e34488 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -75,7 +75,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
B == Plan.getBackedgeTakenCount();
}
-const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
+const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
+ ScalarEvolution &SE, const Loop *L) {
if (V->isLiveIn()) {
if (Value *LiveIn = V->getLiveInIRValue())
return SE.getSCEV(LiveIn);
@@ -86,6 +87,52 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
.Case<VPExpandSCEVRecipe>(
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
+ .Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) {
+ if (!L)
+ return SE.getCouldNotCompute();
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
+ SCEV::FlagAnyWrap);
+ })
+ .Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) {
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L);
+ if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>))
+ return SE.getCouldNotCompute();
+
+ return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()),
+ SE.getMulExpr(IV, SE.getTruncateOrSignExtend(
+ Scale, IV->getType())));
+ })
+ .Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step))
+ return SE.getCouldNotCompute();
+ return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()),
+ Step);
+ })
+ .Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) {
+ if (R->getOpcode() != Instruction::GetElementPtr)
+ return SE.getCouldNotCompute();
+
+ const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ if (isa<SCEVCouldNotCompute>(Base))
+ return SE.getCouldNotCompute();
+
+ SmallVector<const SCEV *> IndexExprs;
+ for (VPValue *Index : drop_begin(R->operands())) {
+ const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L);
+ if (isa<SCEVCouldNotCompute>(IndexExpr))
+ return SE.getCouldNotCompute();
+ IndexExprs.push_back(IndexExpr);
+ }
+
+ Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr())
+ ->getSourceElementType();
+ return SE.getGEPExpr(Base, IndexExprs, SrcElementTy);
+ })
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 37cd413da9079..c21a0e70c1392 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr);
/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
/// SCEV expression could be constructed.
-const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE,
+ const Loop *L = nullptr);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
More information about the llvm-commits
mailing list