[llvm] [LV] Move VPlan-based calculateRegisterUsage to VPlanAnalysis (NFC). (PR #135673)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 13:34:55 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/135673
Move VPlan-based calculateRegisterUsage from LoopVectorize
to VPlanAnalysis.cpp. It is a VPlan-based analysis and this helps
to reduce the size of LoopVectorize.
Probably best to land this after https://github.com/llvm/llvm-project/pull/132190, when LoopVectorizationCostModel::RegisterUsage can be removed.
>From aba5a3f9ac72538baf76a64f7dad92975d36bb02 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 14 Apr 2025 22:20:50 +0200
Subject: [PATCH] [LV] Move VPlan-based calculateREgisterUsage to VPlanAnalysis
(NFC).
---
.../Transforms/Vectorize/LoopVectorize.cpp | 248 +-----------------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 245 +++++++++++++++++
llvm/lib/Transforms/Vectorize/VPlanAnalysis.h | 23 ++
.../LoopVectorize/AArch64/i1-reg-usage.ll | 2 +-
.../LoopVectorize/AArch64/reg-usage.ll | 2 +-
.../LoopVectorize/LoongArch/reg-usage.ll | 4 +-
.../LoopVectorize/PowerPC/reg-usage.ll | 4 +-
.../LoopVectorize/RISCV/reg-usage-bf16.ll | 2 +-
.../LoopVectorize/RISCV/reg-usage-f16.ll | 4 +-
.../LoopVectorize/RISCV/reg-usage.ll | 10 +-
.../RISCV/riscv-vector-reverse.ll | 2 +-
.../LoopVectorize/X86/i1-reg-usage.ll | 2 +-
.../LoopVectorize/X86/reg-usage-debug.ll | 2 +-
.../Transforms/LoopVectorize/X86/reg-usage.ll | 4 +-
14 files changed, 289 insertions(+), 265 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a28cda9fe62b3..00c3b2fb68450 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4863,250 +4863,6 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
}
-/// Get the VF scaling factor applied to the recipe's output, if the recipe has
-/// one.
-static unsigned getVFScaleFactor(VPRecipeBase *R) {
- if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
- return RR->getVFScaleFactor();
- if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
- return RR->getVFScaleFactor();
- return 1;
-}
-
-/// Estimate the register usage for \p Plan and vectorization factors in \p VFs
-/// by calculating the highest number of values that are live at a single
-/// location as a rough estimate. Returns the register usage for each VF in \p
-/// VFs.
-static SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
-calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
- const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
- // Each 'key' in the map opens a new interval. The values
- // of the map are the index of the 'last seen' usage of the
- // recipe that is the key.
- using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
-
- // Maps indices to recipes.
- SmallVector<VPRecipeBase *, 64> Idx2Recipe;
- // Marks the end of each interval.
- IntervalMap EndPoint;
- // Saves the list of recipe indices that are used in the loop.
- SmallPtrSet<VPRecipeBase *, 8> Ends;
- // Saves the list of values that are used in the loop but are defined outside
- // the loop (not including non-recipe values such as arguments and
- // constants).
- SmallSetVector<VPValue *, 8> LoopInvariants;
- LoopInvariants.insert(&Plan.getVectorTripCount());
-
- // We scan the loop in a topological order in order and assign a number to
- // each recipe. We use RPO to ensure that defs are met before their users. We
- // assume that each recipe that has in-loop users starts an interval. We
- // record every time that an in-loop value is used, so we have a list of the
- // first and last occurrences of each recipe.
- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
- Plan.getVectorLoopRegion());
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- if (!VPBB->getParent())
- break;
- for (VPRecipeBase &R : *VPBB) {
- Idx2Recipe.push_back(&R);
-
- // Save the end location of each USE.
- for (VPValue *U : R.operands()) {
- auto *DefR = U->getDefiningRecipe();
-
- // Ignore non-recipe values such as arguments, constants, etc.
- // FIXME: Might need some motivation why these values are ignored. If
- // for example an argument is used inside the loop it will increase the
- // register pressure (so shouldn't we add it to LoopInvariants).
- if (!DefR && (!U->getLiveInIRValue() ||
- !isa<Instruction>(U->getLiveInIRValue())))
- continue;
-
- // If this recipe is outside the loop then record it and continue.
- if (!DefR) {
- LoopInvariants.insert(U);
- continue;
- }
-
- // Overwrite previous end points.
- EndPoint[DefR] = Idx2Recipe.size();
- Ends.insert(DefR);
- }
- }
- if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
- // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
- // exiting block, where their increment will get materialized eventually.
- for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- EndPoint[&R] = Idx2Recipe.size();
- Ends.insert(&R);
- }
- }
- }
- }
-
- // Saves the list of intervals that end with the index in 'key'.
- using RecipeList = SmallVector<VPRecipeBase *, 2>;
- SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
-
- // Next, we transpose the EndPoints into a multi map that holds the list of
- // intervals that *end* at a specific location.
- for (auto &Interval : EndPoint)
- TransposeEnds[Interval.second].push_back(Interval.first);
-
- SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
- SmallVector<LoopVectorizationCostModel::RegisterUsage, 8> RUs(VFs.size());
- SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
-
- LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
-
- VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
-
- const auto &TTICapture = TTI;
- auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
- if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
- (VF.isScalable() &&
- !TTICapture.isElementTypeLegalForScalableVector(Ty)))
- return 0;
- return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
- };
-
- // We scan the instructions linearly and record each time that a new interval
- // starts, by placing it in a set. If we find this value in TransposEnds then
- // we remove it from the set. The max register usage is the maximum register
- // usage of the recipes of the set.
- for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
- VPRecipeBase *R = Idx2Recipe[Idx];
-
- // Remove all of the recipes that end at this location.
- RecipeList &List = TransposeEnds[Idx];
- for (VPRecipeBase *ToRemove : List)
- OpenIntervals.erase(ToRemove);
-
- // Ignore recipes that are never used within the loop and do not have side
- // effects.
- if (!Ends.count(R) && !R->mayHaveSideEffects())
- continue;
-
- // Skip recipes for ignored values.
- // TODO: Should mark recipes for ephemeral values that cannot be removed
- // explictly in VPlan.
- if (isa<VPSingleDefRecipe>(R) &&
- ValuesToIgnore.contains(
- cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
- continue;
-
- // For each VF find the maximum usage of registers.
- for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
- // Count the number of registers used, per register class, given all open
- // intervals.
- // Note that elements in this SmallMapVector will be default constructed
- // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
- // there is no previous entry for ClassID.
- SmallMapVector<unsigned, unsigned, 4> RegUsage;
-
- for (auto *R : OpenIntervals) {
- // Skip recipes that weren't present in the original loop.
- // TODO: Remove after removing the legacy
- // LoopVectorizationCostModel::calculateRegisterUsage
- if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
- VPBranchOnMaskRecipe>(R))
- continue;
-
- if (VFs[J].isScalar() ||
- isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
- VPScalarIVStepsRecipe>(R) ||
- (isa<VPInstruction>(R) &&
- all_of(cast<VPSingleDefRecipe>(R)->users(), [&](VPUser *U) {
- return cast<VPRecipeBase>(U)->usesScalars(R->getVPSingleValue());
- }))) {
- unsigned ClassID = TTI.getRegisterClassForType(
- false, TypeInfo.inferScalarType(R->getVPSingleValue()));
- // FIXME: The target might use more than one register for the type
- // even in the scalar case.
- RegUsage[ClassID] += 1;
- } else {
- // The output from scaled phis and scaled reductions actually has
- // fewer lanes than the VF.
- unsigned ScaleFactor = getVFScaleFactor(R);
- ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
- LLVM_DEBUG(if (VF != VFs[J]) {
- dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
- << " for " << *R << "\n";
- });
-
- for (VPValue *DefV : R->definedValues()) {
- Type *ScalarTy = TypeInfo.inferScalarType(DefV);
- unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
- RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
- }
- }
- }
-
- for (const auto &Pair : RegUsage) {
- auto &Entry = MaxUsages[J][Pair.first];
- Entry = std::max(Entry, Pair.second);
- }
- }
-
- LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
- << OpenIntervals.size() << '\n');
-
- // Add the current recipe to the list of open intervals.
- OpenIntervals.insert(R);
- }
-
- // We also search for instructions that are defined outside the loop, but are
- // used inside the loop. We need this number separately from the max-interval
- // usage number because when we unroll, loop-invariant values do not take
- // more register.
- LoopVectorizationCostModel::RegisterUsage RU;
- for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
- // Note that elements in this SmallMapVector will be default constructed
- // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
- // there is no previous entry for ClassID.
- SmallMapVector<unsigned, unsigned, 4> Invariant;
-
- for (auto *In : LoopInvariants) {
- // FIXME: The target might use more than one register for the type
- // even in the scalar case.
- bool IsScalar = all_of(In->users(), [&](VPUser *U) {
- return cast<VPRecipeBase>(U)->usesScalars(In);
- });
-
- ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
- unsigned ClassID = TTI.getRegisterClassForType(
- VF.isVector(), TypeInfo.inferScalarType(In));
- Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
- }
-
- LLVM_DEBUG({
- dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
- dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
- << " item\n";
- for (const auto &pair : MaxUsages[Idx]) {
- dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(pair.first) << ", " << pair.second
- << " registers\n";
- }
- dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
- << " item\n";
- for (const auto &pair : Invariant) {
- dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(pair.first) << ", " << pair.second
- << " registers\n";
- }
- });
-
- RU.LoopInvariantRegs = Invariant;
- RU.MaxLocalUsers = MaxUsages[Idx];
- RUs[Idx] = RU;
- }
-
- return RUs;
-}
-
unsigned
LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
InstructionCost LoopCost) {
@@ -5158,8 +4914,8 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
return 1;
}
- RegisterUsage R =
- ::calculateRegisterUsage(Plan, {VF}, TTI, ValuesToIgnore)[0];
+ VPRegisterUsage R =
+ calculateRegisterUsageForVPlan(Plan, {VF}, TTI, ValuesToIgnore)[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
for (auto &Pair : R.MaxLocalUsers) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 375d4c9787994..d6ed432b53ea5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -10,8 +10,10 @@
#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
@@ -362,3 +364,246 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
#endif
return Base::properlyDominates(ParentA, ParentB);
}
+
+/// Get the VF scaling factor applied to the recipe's output, if the recipe has
+/// one.
+static unsigned getVFScaleFactor(VPRecipeBase *R) {
+ if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
+ return RR->getVFScaleFactor();
+ if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
+ return RR->getVFScaleFactor();
+ return 1;
+}
+
+/// Estimate the register usage for \p Plan and vectorization factors in \p VFs
+/// by calculating the highest number of values that are live at a single
+/// location as a rough estimate. Returns the register usage for each VF in \p
+/// VFs.
+SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForVPlan(
+ VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
+ // Each 'key' in the map opens a new interval. The values
+ // of the map are the index of the 'last seen' usage of the
+ // recipe that is the key.
+ using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
+
+ // Maps indices to recipes.
+ SmallVector<VPRecipeBase *, 64> Idx2Recipe;
+ // Marks the end of each interval.
+ IntervalMap EndPoint;
+ // Saves the list of recipe indices that are used in the loop.
+ SmallPtrSet<VPRecipeBase *, 8> Ends;
+ // Saves the list of values that are used in the loop but are defined outside
+ // the loop (not including non-recipe values such as arguments and
+ // constants).
+ SmallSetVector<VPValue *, 8> LoopInvariants;
+ LoopInvariants.insert(&Plan.getVectorTripCount());
+
+ // We scan the loop in a topological order in order and assign a number to
+ // each recipe. We use RPO to ensure that defs are met before their users. We
+ // assume that each recipe that has in-loop users starts an interval. We
+ // record every time that an in-loop value is used, so we have a list of the
+ // first and last occurrences of each recipe.
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getVectorLoopRegion());
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ if (!VPBB->getParent())
+ break;
+ for (VPRecipeBase &R : *VPBB) {
+ Idx2Recipe.push_back(&R);
+
+ // Save the end location of each USE.
+ for (VPValue *U : R.operands()) {
+ auto *DefR = U->getDefiningRecipe();
+
+ // Ignore non-recipe values such as arguments, constants, etc.
+ // FIXME: Might need some motivation why these values are ignored. If
+ // for example an argument is used inside the loop it will increase the
+ // register pressure (so shouldn't we add it to LoopInvariants).
+ if (!DefR && (!U->getLiveInIRValue() ||
+ !isa<Instruction>(U->getLiveInIRValue())))
+ continue;
+
+ // If this recipe is outside the loop then record it and continue.
+ if (!DefR) {
+ LoopInvariants.insert(U);
+ continue;
+ }
+
+ // Overwrite previous end points.
+ EndPoint[DefR] = Idx2Recipe.size();
+ Ends.insert(DefR);
+ }
+ }
+ if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
+ // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
+ // exiting block, where their increment will get materialized eventually.
+ for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ EndPoint[&R] = Idx2Recipe.size();
+ Ends.insert(&R);
+ }
+ }
+ }
+ }
+
+ // Saves the list of intervals that end with the index in 'key'.
+ using RecipeList = SmallVector<VPRecipeBase *, 2>;
+ SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
+
+ // Next, we transpose the EndPoints into a multi map that holds the list of
+ // intervals that *end* at a specific location.
+ for (auto &Interval : EndPoint)
+ TransposeEnds[Interval.second].push_back(Interval.first);
+
+ SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
+ SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
+ SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+
+ LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+
+ const auto &TTICapture = TTI;
+ auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
+ if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
+ (VF.isScalable() &&
+ !TTICapture.isElementTypeLegalForScalableVector(Ty)))
+ return 0;
+ return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
+ };
+
+ // We scan the instructions linearly and record each time that a new interval
+ // starts, by placing it in a set. If we find this value in TransposEnds then
+ // we remove it from the set. The max register usage is the maximum register
+ // usage of the recipes of the set.
+ for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
+ VPRecipeBase *R = Idx2Recipe[Idx];
+
+ // Remove all of the recipes that end at this location.
+ RecipeList &List = TransposeEnds[Idx];
+ for (VPRecipeBase *ToRemove : List)
+ OpenIntervals.erase(ToRemove);
+
+ // Ignore recipes that are never used within the loop and do not have side
+ // effects.
+ if (!Ends.count(R) && !R->mayHaveSideEffects())
+ continue;
+
+ // Skip recipes for ignored values.
+ // TODO: Should mark recipes for ephemeral values that cannot be removed
+ // explictly in VPlan.
+ if (isa<VPSingleDefRecipe>(R) &&
+ ValuesToIgnore.contains(
+ cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
+ continue;
+
+ // For each VF find the maximum usage of registers.
+ for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
+ // Count the number of registers used, per register class, given all open
+ // intervals.
+ // Note that elements in this SmallMapVector will be default constructed
+ // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
+ // there is no previous entry for ClassID.
+ SmallMapVector<unsigned, unsigned, 4> RegUsage;
+
+ for (auto *R : OpenIntervals) {
+ // Skip recipes that weren't present in the original loop.
+ // TODO: Remove after removing the legacy
+ // LoopVectorizationCostModel::calculateRegisterUsage
+ if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
+ VPBranchOnMaskRecipe>(R))
+ continue;
+
+ if (VFs[J].isScalar() ||
+ isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
+ VPScalarIVStepsRecipe>(R) ||
+ (isa<VPInstruction>(R) &&
+ all_of(cast<VPSingleDefRecipe>(R)->users(), [&](VPUser *U) {
+ return cast<VPRecipeBase>(U)->usesScalars(R->getVPSingleValue());
+ }))) {
+ unsigned ClassID = TTI.getRegisterClassForType(
+ false, TypeInfo.inferScalarType(R->getVPSingleValue()));
+ // FIXME: The target might use more than one register for the type
+ // even in the scalar case.
+ RegUsage[ClassID] += 1;
+ } else {
+ // The output from scaled phis and scaled reductions actually has
+ // fewer lanes than the VF.
+ unsigned ScaleFactor = getVFScaleFactor(R);
+ ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
+ LLVM_DEBUG(if (VF != VFs[J]) {
+ dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
+ << " for " << *R << "\n";
+ });
+
+ for (VPValue *DefV : R->definedValues()) {
+ Type *ScalarTy = TypeInfo.inferScalarType(DefV);
+ unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
+ RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
+ }
+ }
+ }
+
+ for (const auto &Pair : RegUsage) {
+ auto &Entry = MaxUsages[J][Pair.first];
+ Entry = std::max(Entry, Pair.second);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
+ << OpenIntervals.size() << '\n');
+
+ // Add the current recipe to the list of open intervals.
+ OpenIntervals.insert(R);
+ }
+
+ // We also search for instructions that are defined outside the loop, but are
+ // used inside the loop. We need this number separately from the max-interval
+ // usage number because when we unroll, loop-invariant values do not take
+ // more register.
+ VPRegisterUsage RU;
+ for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
+ // Note that elements in this SmallMapVector will be default constructed
+ // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
+ // there is no previous entry for ClassID.
+ SmallMapVector<unsigned, unsigned, 4> Invariant;
+
+ for (auto *In : LoopInvariants) {
+ // FIXME: The target might use more than one register for the type
+ // even in the scalar case.
+ bool IsScalar = all_of(In->users(), [&](VPUser *U) {
+ return cast<VPRecipeBase>(U)->usesScalars(In);
+ });
+
+ ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
+ unsigned ClassID = TTI.getRegisterClassForType(
+ VF.isVector(), TypeInfo.inferScalarType(In));
+ Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
+ dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
+ << " item\n";
+ for (const auto &pair : MaxUsages[Idx]) {
+ dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first) << ", " << pair.second
+ << " registers\n";
+ }
+ dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
+ << " item\n";
+ for (const auto &pair : Invariant) {
+ dbgs() << "LV(REG): RegisterClass: "
+ << TTI.getRegisterClassName(pair.first) << ", " << pair.second
+ << " registers\n";
+ }
+ });
+
+ RU.LoopInvariantRegs = Invariant;
+ RU.MaxLocalUsers = MaxUsages[Idx];
+ RUs[Idx] = RU;
+ }
+
+ return RUs;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index cc21870bee2e3..f71eadbf9baa2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/IR/Type.h"
namespace llvm {
@@ -27,6 +28,8 @@ struct VPWidenSelectRecipe;
class VPReplicateRecipe;
class VPRecipeBase;
class VPlan;
+class Value;
+class TargetTransformInfo;
class Type;
/// An analysis for type-inference for VPValues.
@@ -68,6 +71,26 @@ class VPTypeAnalysis {
// Collect a VPlan's ephemeral recipes (those used only by an assume).
void collectEphemeralRecipesForVPlan(VPlan &Plan,
DenseSet<VPRecipeBase *> &EphRecipes);
+
+/// A struct that represents some properties of the register usage
+/// of a loop.
+struct VPRegisterUsage {
+ /// Holds the number of loop invariant values that are used in the loop.
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
+ /// Holds the maximum number of concurrent live intervals in the loop.
+ /// The key is ClassID of target-provided register class.
+ SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
+};
+
+/// Estimate the register usage for \p Plan and vectorization factors in \p VFs
+/// by calculating the highest number of values that are live at a single
+/// location as a rough estimate. Returns the register usage for each VF in \p
+/// VFs.
+SmallVector<VPRegisterUsage, 8> calculateRegisterUsageForVPlan(
+ VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &ValuesToIgnore);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANANALYSIS_H
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll
index 0ec90b75002cd..8c0fc6104e9aa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -prefer-predicate-over-epilogue=scalar-epilogue 2>&1 < %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize,vplan -disable-output -prefer-predicate-over-epilogue=scalar-epilogue 2>&1 < %s | FileCheck %s
; REQUIRES: asserts
target triple = "aarch64"
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
index 2e6efec297d61..5699783f89732 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
-; RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize -disable-output <%s 2>&1 | FileCheck %s
+; RUN: opt -mtriple arm64-linux -passes=loop-vectorize -mattr=+sve -debug-only=loop-vectorize,vplan -disable-output <%s 2>&1 | FileCheck %s
; Invariant register usage calculation should take into account if the
; invariant would be used in widened instructions. Only in such cases, a vector
diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll
index 5baf1e013a50f..de49337c185ac 100644
--- a/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/LoongArch/reg-usage.ll
@@ -1,9 +1,9 @@
; REQUIRES: asserts
; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
-; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=1 \
+; RUN: --mattr=+lsx -debug-only=loop-vectorize,vplan --force-vector-width=1 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
; RUN: opt --passes=loop-vectorize --mtriple loongarch64-linux-gnu \
-; RUN: --mattr=+lsx -debug-only=loop-vectorize --force-vector-width=4 \
+; RUN: --mattr=+lsx -debug-only=loop-vectorize,vplan --force-vector-width=4 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VECTOR
define void @bar(ptr %A, i32 signext %n) {
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
index 280b3af04a4db..2c2a60ecc47b9 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
-; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
+; RUN: opt < %s -debug-only=loop-vectorize,vplan -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
+; RUN: opt < %s -debug-only=loop-vectorize,vplan -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
; REQUIRES: asserts
@a = global [1024 x i8] zeroinitializer, align 16
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
index 4e3077cfcab67..af4c34a7efd96 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
; CHECK-LABEL: add
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
index 8825065aa5fe8..04d21d8d64078 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
; CHECK-LABEL: add
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 9585d0d6d6cfd..78bcc978982ca 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -1,22 +1,22 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
+; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
; RUN: -riscv-v-vector-bits-min=128 -force-vector-width=1 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
+; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
+; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
+; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v,+d -debug-only=loop-vectorize --disable-output \
+; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
; RUN: -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 3683cfaa578f9..540ab8d9d094f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -7,7 +7,7 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple riscv64-linux-gnu \
-; RUN: -mattr=+v -debug-only=loop-vectorize -scalable-vectorization=on \
+; RUN: -mattr=+v -debug-only=loop-vectorize,vplan -scalable-vectorization=on \
; RUN: -riscv-v-vector-bits-min=128 -disable-output < %s 2>&1 | FileCheck %s
define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) {
diff --git a/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll
index 3445d4ceff5ec..84a48dba6ae4b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/i1-reg-usage.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize,vplan -disable-output 2>&1 < %s | FileCheck %s
; REQUIRES: asserts
target triple = "x86_64"
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
index 164188db6ccf9..530ff175c81ca 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -debug-only=loop-vectorize -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
+; RUN: opt < %s -debug-only=loop-vectorize,vplan -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
; REQUIRES: asserts
; Test that the register usage estimation is not affected by the presence of
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
index 784b030bf3ab3..0d2488a2cd855 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -debug-only=loop-vectorize -passes=loop-vectorize -vectorizer-maximize-bandwidth -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
-; RUN: opt < %s -debug-only=loop-vectorize -passes=loop-vectorize -vectorizer-maximize-bandwidth -mtriple=x86_64-unknown-linux -mattr=+avx512f -S 2>&1 | FileCheck %s --check-prefix=AVX512F
+; RUN: opt < %s -debug-only=loop-vectorize,vplan -passes=loop-vectorize -vectorizer-maximize-bandwidth -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
+; RUN: opt < %s -debug-only=loop-vectorize,vplan -passes=loop-vectorize -vectorizer-maximize-bandwidth -mtriple=x86_64-unknown-linux -mattr=+avx512f -S 2>&1 | FileCheck %s --check-prefix=AVX512F
; REQUIRES: asserts
@a = global [1024 x i8] zeroinitializer, align 16
More information about the llvm-commits
mailing list