[llvm] [LV] Compute register usage for interleaving on VPlan. (PR #126437)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 07:11:54 PDT 2025
================
@@ -4885,8 +4886,232 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
}
+/// Estimate the register usage for \p Plan and vectorization factors in \p VFs.
+/// Returns the register usage for each VF in \p VFs.
+static SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
+calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
+ const TargetTransformInfo &TTI) {
+ // This function calculates the register usage by measuring the highest number
+ // of values that are alive at a single location. Obviously, this is a very
+ // rough estimation. We scan the loop in a topological order in order and
+ // assign a number to each recipe. We use RPO to ensure that defs are
+ // met before their users. We assume that each recipe that has in-loop
+ // users starts an interval. We record every time that an in-loop value is
+ // used, so we have a list of the first and last occurrences of each
+ // recipe. Next, we transpose this data structure into a multi map that
+ // holds the list of intervals that *end* at a specific location. This multi
+ // map allows us to perform a linear search. We scan the instructions linearly
+ // and record each time that a new interval starts, by placing it in a set.
+ // If we find this value in the multi-map then we remove it from the set.
+ // The max register usage is the maximum size of the set.
+ // We also search for instructions that are defined outside the loop, but are
+ // used inside the loop. We need this number separately from the max-interval
+ // usage number because when we unroll, loop-invariant values do not take
+ // more register.
+ LoopVectorizationCostModel::RegisterUsage RU;
+
+ // Each 'key' in the map opens a new interval. The values
+ // of the map are the index of the 'last seen' usage of the
+ // recipe that is the key.
+ using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
+
+ // Maps indices to recipes.
+ SmallVector<VPRecipeBase *, 64> Idx2Recipe;
+ // Marks the end of each interval.
+ IntervalMap EndPoint;
+ // Saves the list of recipe indices that are used in the loop.
+ SmallPtrSet<VPRecipeBase *, 8> Ends;
+ // Saves the list of values that are used in the loop but are defined outside
+ // the loop (not including non-recipe values such as arguments and
+ // constants).
+ SmallSetVector<VPValue *, 8> LoopInvariants;
+ LoopInvariants.insert(&Plan.getVectorTripCount());
+
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getVectorLoopRegion());
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ if (!VPBB->getParent())
+ break;
+ for (VPRecipeBase &R : *VPBB) {
+ Idx2Recipe.push_back(&R);
+
+ // Save the end location of each USE.
+ for (VPValue *U : R.operands()) {
+ auto *DefR = U->getDefiningRecipe();
+
+ // Ignore non-recipe values such as arguments, constants, etc.
+ // FIXME: Might need some motivation why these values are ignored. If
+ // for example an argument is used inside the loop it will increase the
+ // register pressure (so shouldn't we add it to LoopInvariants).
+ if (!DefR && (!U->getLiveInIRValue() ||
+ !isa<Instruction>(U->getLiveInIRValue())))
+ continue;
+
+ // If this recipe is outside the loop then record it and continue.
+ if (!DefR) {
+ LoopInvariants.insert(U);
+ continue;
+ }
+
+ // Overwrite previous end points.
+ EndPoint[DefR] = Idx2Recipe.size();
+ Ends.insert(DefR);
+ }
+ }
+ if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
+ // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
+ // exiting block, where their increment will get materialized eventually.
+ for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ EndPoint[&R] = Idx2Recipe.size();
+ Ends.insert(&R);
+ }
+ }
+ }
+ }
+
+ // Saves the list of intervals that end with the index in 'key'.
+ using RecipeList = SmallVector<VPRecipeBase *, 2>;
+ SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
+
+ // Transpose the EndPoints to a list of values that end at each index.
+ for (auto &Interval : EndPoint)
+ TransposeEnds[Interval.second].push_back(Interval.first);
+
+ SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
+ SmallVector<LoopVectorizationCostModel::RegisterUsage, 8> RUs(VFs.size());
+ SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+
+ LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+
+ const auto &TTICapture = TTI;
+ auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
+ if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
+ (VF.isScalable() &&
+ !TTICapture.isElementTypeLegalForScalableVector(Ty)))
+ return 0;
+ return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
+ };
+
+ for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
+ VPRecipeBase *R = Idx2Recipe[Idx];
+
+ // Remove all of the recipes that end at this location.
+ RecipeList &List = TransposeEnds[Idx];
+ for (VPRecipeBase *ToRemove : List)
+ OpenIntervals.erase(ToRemove);
+
+ // Ignore recipes that are never used within the loop.
+ if (!Ends.count(R) && !R->mayHaveSideEffects())
+ continue;
----------------
fhahn wrote:
I think we are missing code here to also skip ephemeral values, which the legacy version handled properly. Still need to figure out the best way to fix this.
https://github.com/llvm/llvm-project/pull/126437
More information about the llvm-commits
mailing list