[llvm] [VPlan] First step towards VPlan cost modeling. (PR #67934)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 8 13:02:43 PDT 2024
================
@@ -7395,6 +7396,177 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VF;
}
+static InstructionCost
+computeCostForRecipe(VPRecipeBase *R, ElementCount VF,
+ SmallPtrSetImpl<Instruction *> &SeenUI,
+ LoopVectorizationCostModel &CM,
+ const TargetTransformInfo &TTI, VPCostContext CostCtx) {
+ Instruction *UI = nullptr;
+ if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
+ UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
+ if (UI && (CM.VecValuesToIgnore.contains(UI) || !SeenUI.insert(UI).second))
+ return 0;
+
+ InstructionCost RecipeCost = R->computeCost(VF, CostCtx);
+ if (!RecipeCost.isValid()) {
+ if (auto *IG = dyn_cast<VPInterleaveRecipe>(R)) {
+ RecipeCost = CM.getInstructionCost(IG->getInsertPos(), VF).first;
+ } else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R)) {
+ RecipeCost = CM.getInstructionCost(&WidenMem->getIngredient(), VF).first;
+ } else if (UI) {
+ RecipeCost = CM.getInstructionCost(UI, VF).first;
+ } else
+ return 0;
+ }
+ if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
+ RecipeCost.isValid())
+ RecipeCost = InstructionCost(ForceTargetInstructionCost);
+
+ LLVM_DEBUG({
+ dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
+ R->dump();
+ });
+ return RecipeCost;
+}
+
+static InstructionCost computeCostForReplicatorRegion(
+ VPRegionBlock *Region, ElementCount VF,
+ SmallPtrSetImpl<Instruction *> &SeenUI, LoopVectorizationCostModel &CM,
+ const TargetTransformInfo &TTI, LLVMContext &Ctx, VPCostContext CostCtx) {
+ using namespace llvm::VPlanPatternMatch;
+ InstructionCost RegionCost = 0;
+ assert(Region->isReplicator() &&
+ "can only compute cost for a replicator region");
+ VPBasicBlock *Then =
+ cast<VPBasicBlock>(Region->getEntry()->getSuccessors()[0]);
+ for (VPRecipeBase &R : *Then)
+ RegionCost += computeCostForRecipe(&R, VF, SeenUI, CM, CM.TTI, CostCtx);
+
+ // Note the cost estimates below closely match the current legacy cost model.
+ auto *BOM =
+ cast<VPBranchOnMaskRecipe>(&Region->getEntryBasicBlock()->front());
+ VPValue *Cond = BOM->getOperand(0);
+
+ // Check if Cond is a uniform compare.
+ auto IsUniformCompare = [Cond]() {
+ VPValue *Op = Cond;
+ if (match(Op, m_Not(m_VPValue())))
+ Op = Op->getDefiningRecipe()->getOperand(0);
+ auto *R = Op->getDefiningRecipe();
+ if (!R)
+ return true;
+ if (!match(R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())))
+ return false;
+ return all_of(R->operands(), [](VPValue *Op) {
+ return vputils::isUniformAfterVectorization(Op);
+ });
+ }();
+ bool IsHeaderMaskOrUniformCond =
+ IsUniformCompare ||
+ match(Cond, m_ActiveLaneMask(m_VPValue(), m_VPValue())) ||
+ match(Cond, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) ||
+ isa<VPActiveLaneMaskPHIRecipe>(Cond);
+ if (IsHeaderMaskOrUniformCond || VF.isScalable())
+ return RegionCost;
+
+ // For the scalar case, we may not always execute the original predicated
+ // block, Thus, scale the block's cost by the probability of executing it.
+ // blockNeedsPredication from Legal is used so as to not include all blocks in
+ // tail folded loops.
+ if (VF.isScalar())
+ return RegionCost / getReciprocalPredBlockProb();
+
+ // Add the cost for branches around scalarized and predicated blocks.
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
+ return RegionCost +
+ TTI.getScalarizationOverhead(
+ Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()),
+ /*Insert*/ false, /*Extract*/ true, CostKind) +
+ (TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue());
+}
+
+InstructionCost LoopVectorizationPlanner::computeCost(VPlan &Plan,
+ ElementCount VF) {
+ InstructionCost Cost = 0;
+ SmallPtrSet<Instruction *, 8> SeenUI;
+ LLVMContext &Ctx = OrigLoop->getHeader()->getContext();
+ VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), Ctx);
+
+ // Cost modeling for inductions is inaccurate in the legacy cost model
+ // compared to the recipes that are generated. To match here initially during
+ // VPlan cost model bring up directly use the induction costs from the legacy
+ // cost model and skip induction recipes.
+ for (const auto &[IV, _] : Legal->getInductionVars()) {
+ Instruction *IVInc = cast<Instruction>(
+ IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+ InstructionCost RecipeCost = CM.getInstructionCost(IVInc, VF).first;
----------------
ayalz wrote:
The use of "Recipe" may be confusing as no recipes are involved here, IVInc is an underlying Instruction.
https://github.com/llvm/llvm-project/pull/67934
More information about the llvm-commits
mailing list