[llvm] 46080ab - Revert "[VPlan] First step towards VPlan cost modeling. (#92555)"
Arthur Eubanks via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 13 09:37:34 PDT 2024
Author: Arthur Eubanks
Date: 2024-06-13T16:37:21Z
New Revision: 46080abe9b136821eda2a1a27d8a13ceac349f8c
URL: https://github.com/llvm/llvm-project/commit/46080abe9b136821eda2a1a27d8a13ceac349f8c
DIFF: https://github.com/llvm/llvm-project/commit/46080abe9b136821eda2a1a27d8a13ceac349f8c.diff
LOG: Revert "[VPlan] First step towards VPlan cost modeling. (#92555)"
This reverts commit 00798354c553d48d27006a2b06a904bd6013e31b.
Causes crashes, see comments on https://github.com/llvm/llvm-project/pull/92555.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanValue.h
llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 6011e16076220..c03c278fcebe7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -344,16 +344,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;
- /// Computes the cost of \p Plan for vectorization factor \p VF.
- ///
- /// The current implementation requires access to the
- /// LoopVectorizationLegality to handle inductions and reductions, which is
- /// why it is kept separate from the VPlan-only cost infrastructure.
- ///
- /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
- /// been retired.
- InstructionCost cost(VPlan &Plan, ElementCount VF) const;
-
public:
LoopVectorizationPlanner(
Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
@@ -375,9 +365,6 @@ class LoopVectorizationPlanner {
/// Return the best VPlan for \p VF.
VPlan &getBestPlanFor(ElementCount VF) const;
- /// Return the most profitable plan and fix its VF to the most profitable one.
- VPlan &getBestPlan() const;
-
/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
///
@@ -456,9 +443,7 @@ class LoopVectorizationPlanner {
ElementCount MinVF);
/// \return The most profitable vectorization factor and the cost of that VF.
- /// This method checks every VF in \p CandidateVFs. This is now only used to
- /// verify the decisions by the new VPlan-based cost-model and will be retired
- /// once the VPlan-based cost-model is stabilized.
+ /// This method checks every VF in \p CandidateVFs.
VectorizationFactor
selectVectorizationFactor(const ElementCountSet &CandidateVFs);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 469a9291f339f..37b8023e1fcf2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -290,7 +290,7 @@ static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
cl::desc("A flag that overrides the target's max interleave factor for "
"vectorized loops."));
-cl::opt<unsigned> ForceTargetInstructionCost(
+static cl::opt<unsigned> ForceTargetInstructionCost(
"force-target-instruction-cost", cl::init(0), cl::Hidden,
cl::desc("A flag that overrides the target's expected cost for "
"an instruction to a single constant value. Mostly "
@@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
}
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+/// we always assume predicated blocks have a 50% chance of executing.
+static unsigned getReciprocalPredBlockProb() { return 2; }
+
/// Returns "best known" trip count for the specified loop \p L as defined by
/// the following procedure:
/// 1) Returns exact trip count if it is known.
@@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor chosen for the original loop.
bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
- /// Return the cost of instructions in an inloop reduction pattern, if I is
- /// part of that pattern.
- std::optional<InstructionCost>
- getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
- TTI::TargetCostKind CostKind) const;
-
- /// Returns the execution time cost of an instruction for a given vector
- /// width. Vector width of one means scalar.
- VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
-
private:
unsigned NumPredStores = 0;
@@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel {
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
+ /// Returns the execution time cost of an instruction for a given vector
+ /// width. Vector width of one means scalar.
+ VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
/// The cost-computation logic from getInstructionCost which provides
/// the vector type as an output parameter.
InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
Type *&VectorTy);
+ /// Return the cost of instructions in an inloop reduction pattern, if I is
+ /// part of that pattern.
+ std::optional<InstructionCost>
+ getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy,
+ TTI::TargetCostKind CostKind) const;
+
/// Calculate vectorization cost of memory instruction \p I.
InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF);
@@ -7280,10 +7288,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
if (!MaxFactors.hasVector())
return VectorizationFactor::Disabled();
- // Select the optimal vectorization factor according to the legacy cost-model.
- // This is now only used to verify the decisions by the new VPlan-based
- // cost-model and will be retired once the VPlan-based cost-model is
- // stabilized.
+ // Select the optimal vectorization factor.
VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
if (!hasPlanWithVF(VF.Width)) {
@@ -7294,166 +7299,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VF;
}
-InstructionCost VPCostContext::getLegacyCost(Instruction *UI,
- ElementCount VF) const {
- return CM.getInstructionCost(UI, VF).first;
-}
-
-bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
- return (IsVector && CM.VecValuesToIgnore.contains(UI)) ||
- SkipCostComputation.contains(UI);
-}
-
-InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
- ElementCount VF) const {
- InstructionCost Cost = 0;
- LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext();
- VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM);
-
- // Cost modeling for inductions is inaccurate in the legacy cost model
- // compared to the recipes that are generated. To match here initially during
- // VPlan cost model bring up directly use the induction costs from the legacy
- // cost model. Note that we do this as pre-processing; the VPlan may not have
- // any recipes associated with the original induction increment instruction.
- // We precompute the cost of both induction increment instructions that are
- // represented by recipes and those that are not, to avoid distinguishing
- // between them here, and skip all recipes that represent induction increments
- // (the former case) later on, if they exist, to avoid counting them twice.
- // TODO: Switch to more accurate costing based on VPlan.
- for (const auto &[IV, _] : Legal->getInductionVars()) {
- Instruction *IVInc = cast<Instruction>(
- IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
- assert(!CostCtx.SkipCostComputation.contains(IVInc) &&
- "Same IV increment for multiple inductions?");
- CostCtx.SkipCostComputation.insert(IVInc);
- InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF);
- LLVM_DEBUG({
- dbgs() << "Cost of " << InductionCost << " for VF " << VF
- << ":\n induction increment " << *IVInc << "\n";
- IVInc->dump();
- });
- Cost += InductionCost;
- }
-
- /// Compute the cost of all exiting conditions of the loop using the legacy
- /// cost model. This is to match the legacy behavior, which adds the cost of
- /// all exit conditions. Note that this over-estimates the cost, as there will
- /// be a single condition to control the vector loop.
- SmallVector<BasicBlock *> Exiting;
- CM.TheLoop->getExitingBlocks(Exiting);
- // Add the cost of all exit conditions.
- for (BasicBlock *EB : Exiting) {
- auto *Term = dyn_cast<BranchInst>(EB->getTerminator());
- if (!Term)
- continue;
- if (auto *CondI = dyn_cast<Instruction>(Term->getOperand(0))) {
- assert(!CostCtx.SkipCostComputation.contains(CondI) &&
- "Condition already skipped?");
- CostCtx.SkipCostComputation.insert(CondI);
- Cost += CostCtx.getLegacyCost(CondI, VF);
- }
- }
-
- // The legacy cost model has special logic to compute the cost of in-loop
- // reductions, which may be smaller than the sum of all instructions involved
- // in the reduction. For AnyOf reductions, VPlan codegen may remove the select
- // which the legacy cost model uses to assign cost. Pre-compute their costs
- // for now.
- // TODO: Switch to costing based on VPlan once the logic has been ported.
- for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) {
- if (!CM.isInLoopReduction(RedPhi) &&
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind()))
- continue;
-
- // AnyOf reduction codegen may remove the select. To match the legacy cost
- // model, pre-compute the cost for AnyOf reductions here.
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
- auto *Select = cast<SelectInst>(*find_if(
- RedPhi->users(), [](User *U) { return isa<SelectInst>(U); }));
- assert(!CostCtx.SkipCostComputation.contains(Select) &&
- "reduction op visited multiple times");
- CostCtx.SkipCostComputation.insert(Select);
- auto ReductionCost = CostCtx.getLegacyCost(Select, VF);
- LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
- << ":\n any-of reduction " << *Select << "\n");
- Cost += ReductionCost;
- continue;
- }
-
- const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop);
- SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(),
- ChainOps.end());
- // Also include the operands of instructions in the chain, as the cost-model
- // may mark extends as free.
- for (auto *ChainOp : ChainOps) {
- for (Value *Op : ChainOp->operands()) {
- if (auto *I = dyn_cast<Instruction>(Op))
- ChainOpsAndOperands.insert(I);
- }
- }
-
- // Pre-compute the cost for I, if it has a reduction pattern cost.
- for (Instruction *I : ChainOpsAndOperands) {
- auto ReductionCost = CM.getReductionPatternCost(
- I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput);
- if (!ReductionCost)
- continue;
-
- assert(!CostCtx.SkipCostComputation.contains(I) &&
- "reduction op visited multiple times");
- CostCtx.SkipCostComputation.insert(I);
- LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF
- << ":\n in-loop reduction " << *I << "\n");
- Cost += *ReductionCost;
- }
- }
-
- // Now compute and add the VPlan-based cost.
- Cost += Plan.cost(VF, CostCtx);
- LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n");
- return Cost;
-}
-
-VPlan &LoopVectorizationPlanner::getBestPlan() const {
- // If there is a single VPlan with a single VF, return it directly.
- VPlan &FirstPlan = *VPlans[0];
- if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
- return FirstPlan;
-
- VPlan *BestPlan = &FirstPlan;
- ElementCount ScalarVF = ElementCount::getFixed(1);
- assert(hasPlanWithVF(ScalarVF) &&
- "More than a single plan/VF w/o any plan having scalar VF");
-
- InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF);
- VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost);
-
- bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
- if (ForceVectorization) {
- // Ignore scalar width, because the user explicitly wants vectorization.
- // Initialize cost to max so that VF = 2 is, at least, chosen during cost
- // evaluation.
- BestFactor.Cost = InstructionCost::getMax();
- }
-
- for (auto &P : VPlans) {
- for (ElementCount VF : P->vectorFactors()) {
- if (VF.isScalar())
- continue;
- InstructionCost Cost = cost(*P, VF);
- VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
- if (isMoreProfitable(CurrentFactor, BestFactor)) {
- BestFactor = CurrentFactor;
- BestPlan = &*P;
- }
- }
- }
- BestPlan->setVF(BestFactor.Width);
- return *BestPlan;
-}
-
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
assert(count_if(VPlans,
[VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
@@ -10312,15 +10157,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
- VPlan &BestPlan = LVP.getBestPlan();
- assert(size(BestPlan.vectorFactors()) == 1 &&
- "Plan should have a single VF");
- ElementCount Width = *BestPlan.vectorFactors().begin();
- LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
- << "\n");
- assert(VF.Width == Width &&
- "VPlan cost model and legacy cost model disagreed");
- LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index ad6a718320830..f17be451e6846 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -747,64 +747,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
State->Instance.reset();
}
-InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
- InstructionCost Cost = 0;
- for (VPRecipeBase &R : Recipes)
- Cost += R.cost(VF, Ctx);
- return Cost;
-}
-
-InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
- if (!isReplicator()) {
- InstructionCost Cost = 0;
- for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
- Cost += Block->cost(VF, Ctx);
- return Cost;
- }
-
- // Compute the cost of a replicate region. Replicating isn't supported for
- // scalable vectors, return an invalid cost for them.
- // TODO: Discard scalable VPlans with replicate recipes earlier after
- // construction.
- if (VF.isScalable())
- return InstructionCost::getInvalid();
-
- // First compute the cost of the conditionally executed recipes, followed by
- // account for the branching cost, except if the mask is a header mask or
- // uniform condition.
- using namespace llvm::VPlanPatternMatch;
- VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
- InstructionCost ThenCost = Then->cost(VF, Ctx);
-
- // Note the cost estimates below closely match the current legacy cost model.
- auto *BOM = cast<VPBranchOnMaskRecipe>(&getEntryBasicBlock()->front());
- VPValue *Cond = BOM->getOperand(0);
-
- // Check if Cond is a uniform compare or a header mask and don't account for
- // branching costs. A uniform condition corresponding to a single branch per
- // VF, and the header mask will always be true except in the last iteration.
- if (vputils::isUniformBoolean(Cond) ||
- vputils::isHeaderMask(Cond, *getPlan()))
- return ThenCost;
-
- // For the scalar case, we may not always execute the original predicated
- // block, Thus, scale the block's cost by the probability of executing it.
- if (VF.isScalar())
- return ThenCost / getReciprocalPredBlockProb();
-
- // Add the cost for branches around scalarized and predicated blocks.
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
- auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF);
- auto FixedVF = VF.getFixedValue(); // Known to be non scalable.
- InstructionCost Cost = ThenCost;
- Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF),
- /*Insert*/ false, /*Extract*/ true,
- CostKind);
- Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF;
- return Cost;
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -978,12 +920,6 @@ void VPlan::execute(VPTransformState *State) {
"DT not preserved correctly");
}
-InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
- // For now only return the cost of the vector loop region, ignoring any other
- // blocks, like the preheader or middle blocks.
- return getVectorLoopRegion()->cost(VF, Ctx);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPlan::printLiveIns(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
@@ -1518,25 +1454,3 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
Plan.addSCEVExpansion(Expr, Expanded);
return Expanded;
}
-
-bool vputils::isUniformBoolean(VPValue *Cond) {
- if (match(Cond, m_Not(m_VPValue())))
- Cond = Cond->getDefiningRecipe()->getOperand(0);
- auto *R = Cond->getDefiningRecipe();
- if (!R)
- return true;
- // TODO: match additional patterns preserving uniformity of booleans, e.g.,
- // AND/OR/etc.
- return match(R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
- all_of(R->operands(), [](VPValue *Op) {
- return vputils::isUniformAfterVectorization(Op);
- });
-}
-
-bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) {
- VPValue *Op;
- return isa<VPActiveLaneMaskPHIRecipe>(V) ||
- match(V, m_ActiveLaneMask(m_VPValue(), m_VPValue())) ||
- (match(V, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue(Op))) &&
- Op == Plan.getOrCreateBackedgeTakenCount());
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 4c0972e517263..5bb88e4a57dc3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -42,7 +42,6 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/InstructionCost.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -65,11 +64,8 @@ class VPlan;
class VPReplicateRecipe;
class VPlanSlp;
class Value;
-class LoopVectorizationCostModel;
class LoopVersioning;
-struct VPCostContext;
-
namespace Intrinsic {
typedef unsigned ID;
}
@@ -86,14 +82,6 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
Loop *CurLoop = nullptr);
-/// A helper function that returns the reciprocal of the block probability of
-/// predicated blocks. If we return X, we are assuming the predicated block
-/// will execute once for every X iterations of the loop header.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-/// we always assume predicated blocks have a 50% chance of executing.
-inline unsigned getReciprocalPredBlockProb() { return 2; }
-
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
@@ -636,9 +624,6 @@ class VPBlockBase {
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(VPTransformState *State) = 0;
- /// Return the cost of the block.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
-
/// Delete all blocks reachable from a given VPBlockBase, inclusive.
static void deleteCFG(VPBlockBase *Entry);
@@ -722,27 +707,6 @@ class VPLiveOut : public VPUser {
#endif
};
-/// Struct to hold various analysis needed for cost computations.
-struct VPCostContext {
- const TargetTransformInfo &TTI;
- VPTypeAnalysis Types;
- LLVMContext &LLVMCtx;
- LoopVectorizationCostModel &CM;
- SmallPtrSet<Instruction *, 8> SkipCostComputation;
-
- VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy,
- LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
- : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
-
- /// Return the cost for \p UI with \p VF using the legacy cost model as
- /// fallback until computing the cost of all recipes migrates to VPlan.
- InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
-
- /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
- /// has already been pre-computed.
- bool skipCostComputation(Instruction *UI, bool IsVector) const;
-};
-
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
@@ -782,11 +746,6 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// this VPRecipe, thereby "executing" the VPlan.
virtual void execute(VPTransformState &State) = 0;
- /// Return the cost of this recipe, taking into account if the cost
- /// computation should be skipped and the ForceTargetInstructionCost flag.
- /// Also takes care of printing the cost for debugging.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
-
/// Insert an unlinked recipe into a basic block immediately before
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
@@ -847,11 +806,6 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
/// Returns the debug location of the recipe.
DebugLoc getDebugLoc() const { return DL; }
-
-protected:
- /// Compute the cost of this recipe using the legacy cost model and the
- /// underlying instructions.
- InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
};
// Helper macro to define common classof implementations for recipes.
@@ -1427,6 +1381,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
+ assert(UI.getType() == ResultTy &&
+ "result type of underlying cast doesn't match");
}
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
@@ -2140,8 +2096,6 @@ class VPInterleaveRecipe : public VPRecipeBase {
"Op must be an operand of the recipe");
return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
}
-
- Instruction *getInsertPos() const { return IG->getInsertPos(); }
};
/// A recipe to represent inloop reduction operations, performing a reduction on
@@ -2956,9 +2910,6 @@ class VPBasicBlock : public VPBlockBase {
/// this VPBasicBlock, thereby "executing" the VPlan.
void execute(VPTransformState *State) override;
- /// Return the cost of this VPBasicBlock.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
-
/// Return the position of the first non-phi node recipe in the block.
iterator getFirstNonPhi();
@@ -3133,9 +3084,6 @@ class VPRegionBlock : public VPBlockBase {
/// this VPRegionBlock, thereby "executing" the VPlan.
void execute(VPTransformState *State) override;
- // Return the cost of this region.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
-
void dropAllReferences(VPValue *NewValue) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3255,9 +3203,6 @@ class VPlan {
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
- /// Return the cost of this plan.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
-
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
@@ -3301,11 +3246,6 @@ class VPlan {
return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
}
- iterator_range<SmallSetVector<ElementCount, 2>::iterator>
- vectorFactors() const {
- return {VFs.begin(), VFs.end()};
- }
-
bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
@@ -3725,13 +3665,6 @@ inline bool isUniformAfterVectorization(VPValue *VPV) {
return VPI->isVectorToScalar();
return false;
}
-
-/// Return true if \p Cond is a uniform boolean.
-bool isUniformBoolean(VPValue *Cond);
-
-/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(VPValue *V, VPlan &Plan);
-
} // end namespace vputils
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b491ea5a18b54..7a482455473e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -39,7 +39,6 @@ using VectorParts = SmallVector<Value *, 2>;
namespace llvm {
extern cl::opt<bool> EnableVPlanNativePath;
}
-extern cl::opt<unsigned> ForceTargetInstructionCost;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
@@ -256,40 +255,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}
-InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
- if (auto *S = dyn_cast<VPSingleDefRecipe>(this)) {
- auto *UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
- if (UI && Ctx.skipCostComputation(UI, VF.isVector()))
- return 0;
- }
-
- InstructionCost RecipeCost = computeCost(VF, Ctx);
- if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
- RecipeCost.isValid())
- RecipeCost = InstructionCost(ForceTargetInstructionCost);
-
- LLVM_DEBUG({
- dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
- dump();
- });
- return RecipeCost;
-}
-
-InstructionCost VPRecipeBase::computeCost(ElementCount VF,
- VPCostContext &Ctx) const {
- // Compute the cost for the recipe falling back to the legacy cost model using
- // the underlying instruction. If there is no underlying instruction, returns
- // 0.
- Instruction *UI = nullptr;
- if (auto *S = dyn_cast<VPSingleDefRecipe>(this))
- UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
- else if (auto *IG = dyn_cast<VPInterleaveRecipe>(this))
- UI = IG->getInsertPos();
- else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
- UI = &WidenMem->getIngredient();
- return UI ? Ctx.getLegacyCost(UI, VF) : 0;
-}
-
FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
assert(OpType == OperationType::FPMathOp &&
"recipe doesn't have fast math flags");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 082a442bf399d..8ec67eb2f54bd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -999,10 +999,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
: Instruction::ZExt;
auto *VPC =
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
- // UnderlyingExt has distinct return type, used to retain legacy cost.
- VPC->setUnderlyingValue(UnderlyingExt);
- }
VPC->insertBefore(&R);
Trunc->replaceAllUsesWith(VPC);
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
@@ -1522,7 +1518,6 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
VPInstruction *New = Builder.createOverflowingOp(
Instruction::Add, {A, B}, {false, false},
RecWithFlags->getDebugLoc());
- New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
RecWithFlags->replaceAllUsesWith(New);
RecWithFlags->eraseFromParent();
CurRec = New;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index fa6a65ff2f3ad..8d945f6f2b8ea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -74,7 +74,8 @@ class VPValue {
public:
/// Return the underlying Value attached to this VPValue.
- Value *getUnderlyingValue() const { return UnderlyingVal; }
+ Value *getUnderlyingValue() { return UnderlyingVal; }
+ const Value *getUnderlyingValue() const { return UnderlyingVal; }
/// An enumeration for keeping track of the concrete subclass of VPValue that
/// are actually instantiated.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 41879f3ebef5a..b5aa96eb23f5e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -119,7 +119,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK: LV: Interleaving disabled by the pass manager
; CHECK-NEXT: LV: Vectorizing: innermost loop.
@@ -261,7 +260,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
-; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK: LV: Interleaving disabled by the pass manager
; CHECK-NEXT: LV: Vectorizing: innermost loop.
More information about the llvm-commits
mailing list