[llvm] [LV] Optimize VPWidenIntOrFpInductionRecipe for known TC (PR #118828)
Hari Limaye via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 06:44:55 PST 2025
================
@@ -975,11 +978,74 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
}
}
-void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
- unsigned BestUF,
- PredicatedScalarEvolution &PSE) {
- assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
- assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
+/// Optimize the width of vector induction variables in \p Plan based on a known
+/// constant Trip Count, \p BestVF and \p BestUF.
+static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
+ ElementCount BestVF,
+ unsigned BestUF) {
+ // Only proceed if we have not completely removed the vector region.
+ if (!Plan.getVectorLoopRegion())
+ return false;
+
+ auto *TC = dyn_cast_if_present<ConstantInt>(
+ Plan.getTripCount()->getUnderlyingValue());
+ if (!TC || !BestVF.isFixed())
+ return false;
+
+ // Calculate the widest type required for known TC, VF and UF.
+ auto ComputeBitWidth = [](APInt TC, uint64_t Align) {
+ auto AlignedTC =
+ Align * APIntOps::RoundingUDiv(TC, APInt(TC.getBitWidth(), Align),
+ APInt::Rounding::UP);
+ auto MaxVal = AlignedTC - 1;
+ return std::max<unsigned>(PowerOf2Ceil(MaxVal.getActiveBits()), 8);
+ };
+ unsigned NewBitWidth =
+ ComputeBitWidth(TC->getValue(), BestVF.getKnownMinValue() * BestUF);
+
+ LLVMContext &Ctx = Plan.getCanonicalIV()->getScalarType()->getContext();
+ auto *NewIVTy = IntegerType::get(Ctx, NewBitWidth);
+
+ bool MadeChange = false;
+
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+ auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+ if (!WideIV || !WideIV->isCanonical() ||
+ WideIV->hasMoreThanOneUniqueUser() ||
+ NewIVTy == WideIV->getScalarType())
+ continue;
+
+ // Currently only handle cases where the single user is a header-mask
+ // comparison with the backedge-taken-count.
+ using namespace VPlanPatternMatch;
+ if (!match(*WideIV->user_begin(),
+ m_Binary<Instruction::ICmp>(
+ m_Specific(WideIV),
+ m_Specific(Plan.getOrCreateBackedgeTakenCount()))))
+ continue;
+
+ // Update IV operands and comparison bound to use new narrower type.
+ auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
+ WideIV->setStartValue(NewStart);
+ auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
+ WideIV->setStepValue(NewStep);
+
+ auto *NewBTC = new VPWidenCastRecipe(
+ Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
+ Plan.getVectorPreheader()->appendRecipe(NewBTC);
+ auto *Cmp = dyn_cast<VPInstruction>(*WideIV->user_begin());
----------------
hazzlim wrote:
Good point - done.
https://github.com/llvm/llvm-project/pull/118828
More information about the llvm-commits
mailing list