[llvm] [VPlan] Set branch weight metadata on middle term in VPlan (NFC) (PR #143035)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 01:53:29 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/143035
>From 69435ead74cb6ae972395333676f0137736c4a1e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 5 Jun 2025 20:39:44 +0100
Subject: [PATCH 1/4] [VPlan] Set branch weight metadata on middle term in
VPlan (NFC)
Manage branch weights for the BranchOnCond in the middle block in VPlan.
This requires updating VPInstruction to inherit from VPIRMetadata, which
in general makes sense as there are a number of opcodes that could take
metadata.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 45 +++++++++-------
llvm/lib/Transforms/Vectorize/VPlan.h | 53 ++++++++++---------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++-
3 files changed, 59 insertions(+), 45 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d236111836391..7c9bd82d73260 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7273,6 +7273,30 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock));
}
+/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
+/// BranchOnCond recipe.
+static void addBranchWeigthToMiddleTerminator(VPlan &Plan, ElementCount VF,
+ Loop *OrigLoop) {
+ // 4. Adjust branch weight of the branch in the middle block.
+ Instruction *LatchTerm = OrigLoop->getLoopLatch()->getTerminator();
+ if (!hasBranchWeightMD(*LatchTerm))
+ return;
+
+ VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
+ auto *MiddleTerm =
+ dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
+ if (!MiddleTerm)
+ return;
+
+ // Assume that `Count % VectorTripCount` is equally distributed.
+ unsigned TripCount = Plan.getUF() * VF.getKnownMinValue();
+ assert(TripCount > 0 && "trip count should not be zero");
+ MDBuilder MDB(LatchTerm->getContext());
+ MDNode *BranchWeights =
+ MDB.createBranchWeights({1, TripCount - 1}, /*IsExpected=*/false);
+ MiddleTerm->addMetadata(LLVMContext::MD_prof, BranchWeights);
+}
+
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) {
@@ -7295,11 +7319,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
*Legal->getWidestInductionType());
- // Retrieve and store the middle block before dissolving regions. Regions are
- // dissolved after optimizing for VF and UF, which completely removes unneeded
- // loop regions first.
- VPBasicBlock *MiddleVPBB =
- BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
+
+ addBranchWeigthToMiddleTerminator(BestVPlan, BestVF, OrigLoop);
VPlanTransforms::dissolveLoopRegions(BestVPlan);
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
@@ -7442,20 +7463,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ILV.printDebugTracesAtEnd();
- // 4. Adjust branch weight of the branch in the middle block.
- if (HeaderVPBB) {
- auto *MiddleTerm =
- cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
- if (MiddleTerm->isConditional() &&
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
- // Assume that `Count % VectorTripCount` is equally distributed.
- unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
- assert(TripCount > 0 && "trip count should not be zero");
- const uint32_t Weights[] = {1, TripCount - 1};
- setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
- }
- }
-
return ExpandedSCEVs;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index acc861b991975..468284168e9ca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,11 +882,39 @@ template <unsigned PartOpIdx> class VPUnrollPartAccessor {
unsigned getUnrollPart(VPUser &U) const;
};
+/// Helper to manage IR metadata for recipes. It filters out metadata that
+/// cannot be propagated.
+class VPIRMetadata {
+ SmallVector<std::pair<unsigned, MDNode *>> Metadata;
+
+public:
+ VPIRMetadata() {}
+
+ /// Adds metatadata that can be preserved from the original instruction
+ /// \p I.
+ VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
+
+ /// Adds metatadata that can be preserved from the original instruction
+ /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
+ VPIRMetadata(Instruction &I, LoopVersioning *LVer);
+
+ /// Copy constructor for cloning.
+ VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
+
+ /// Add all metadata to \p I.
+ void applyMetadata(Instruction &I) const;
+
+ void addMetadata(unsigned Kind, MDNode *Node) {
+ Metadata.emplace_back(Kind, Node);
+ }
+};
+
/// This is a concrete Recipe that models a single VPlan-level instruction.
/// While as any Recipe it may generate a sequence of IR instructions when
/// executed, these instructions would always form a single-def expression as
/// the VPInstruction is also a single def-use vertex.
class VPInstruction : public VPRecipeWithIRFlags,
+ public VPIRMetadata,
public VPUnrollPartAccessor<1> {
friend class VPlanSlp;
@@ -976,7 +1004,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL = {},
const Twine &Name = "")
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
- Opcode(Opcode), Name(Name.str()) {}
+ VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, DebugLoc DL = {},
@@ -1268,29 +1296,6 @@ struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors {
const VPRecipeBase *getAsRecipe() const override { return this; }
};
-/// Helper to manage IR metadata for recipes. It filters out metadata that
-/// cannot be propagated.
-class VPIRMetadata {
- SmallVector<std::pair<unsigned, MDNode *>> Metadata;
-
-public:
- VPIRMetadata() {}
-
- /// Adds metatadata that can be preserved from the original instruction
- /// \p I.
- VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
-
- /// Adds metatadata that can be preserved from the original instruction
- /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
- VPIRMetadata(Instruction &I, LoopVersioning *LVer);
-
- /// Copy constructor for cloning.
- VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
-
- /// Add all metadata to \p I.
- void applyMetadata(Instruction &I) const;
-};
-
/// VPWidenRecipe is a recipe for producing a widened instruction using the
/// opcode and operands of the recipe. This recipe covers most of the
/// traditional vectorization cases where each recipe transforms into a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 62b99d98a2b5e..f5a2533727b3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -410,7 +410,7 @@ VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, DebugLoc DL,
const Twine &Name)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
- Opcode(Opcode), Name(Name.str()) {
+ VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {
assert(flagsValidForOpcode(getOpcode()) &&
"Set flags not supported for the provided opcode");
}
@@ -591,7 +591,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::BranchOnCond: {
Value *Cond = State.get(getOperand(0), VPLane(0));
- return createCondBranch(Cond, getParent(), State);
+ auto *Br = createCondBranch(Cond, getParent(), State);
+ applyMetadata(*Br);
+ return Br;
}
case VPInstruction::BranchOnCount: {
// First create the compare.
>From a0a2306f34a86214f3650e25f7306ace9d9661d8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 10 Jun 2025 16:13:15 +0100
Subject: [PATCH 2/4] !fixup add comment re only adding to conditional
terminator.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7c9bd82d73260..64fdc8232d5cb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7285,6 +7285,7 @@ static void addBranchWeigthToMiddleTerminator(VPlan &Plan, ElementCount VF,
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
auto *MiddleTerm =
dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
+ // Only add branch metadata if there is a (conditional) terminator.
if (!MiddleTerm)
return;
>From 91c63c69d12bcfcaf9c732e103d01c01b8a412a0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 12 Jun 2025 09:44:03 +0100
Subject: [PATCH 3/4] !fixup fix typo
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 64fdc8232d5cb..86a61ad068678 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7275,7 +7275,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
/// BranchOnCond recipe.
-static void addBranchWeigthToMiddleTerminator(VPlan &Plan, ElementCount VF,
+static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
Loop *OrigLoop) {
// 4. Adjust branch weight of the branch in the middle block.
Instruction *LatchTerm = OrigLoop->getLoopLatch()->getTerminator();
@@ -7321,7 +7321,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
*Legal->getWidestInductionType());
- addBranchWeigthToMiddleTerminator(BestVPlan, BestVF, OrigLoop);
+ addBranchWeightToMiddleTerminator(BestVPlan, BestVF, OrigLoop);
VPlanTransforms::dissolveLoopRegions(BestVPlan);
// Perform the actual loop transformation.
VPTransformState State(&TTI, BestVF, LI, DT, ILV.AC, ILV.Builder, &BestVPlan,
>From 0317d1d800853239941b1548cdde45ca9b52f4f1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 12 Jun 2025 09:52:57 +0100
Subject: [PATCH 4/4] !fixup add assert fo BranchOncond.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 86a61ad068678..93ab3353a296a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7289,6 +7289,8 @@ static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
if (!MiddleTerm)
return;
+ assert(MiddleTerm->getOpcode() == VPInstruction::BranchOnCond &&
+ "must have a BranchOnCond");
// Assume that `Count % VectorTripCount` is equally distributed.
unsigned TripCount = Plan.getUF() * VF.getKnownMinValue();
assert(TripCount > 0 && "trip count should not be zero");
More information about the llvm-commits
mailing list