[llvm] [LV] Use vscale for tuning to improve branch weight estimates (PR #144733)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 27 05:18:53 PDT 2025
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/144733
>From 2077d7922bceb29916c57102c544772f89250b57 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 18 Jun 2025 15:48:22 +0000
Subject: [PATCH 1/2] [LV] Use vscale for tuning to improve branch weight
estimates
In addBranchWeightToMiddleTerminator we attempt to add branch
weights to the middle block terminator. We pessimistically
assume vscale=1, whereas we can improve the estimate by using
the value of vscale used for tuning.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 ++++--
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 ++++--
llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 3 ++-
.../Transforms/LoopVectorize/AArch64/check-prof-info.ll | 2 +-
4 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 41d554651ef08..ddb5e334bcca2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7326,9 +7326,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
- if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
+ if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+ std::optional<unsigned> VScale = CM.getVScaleForTuning();
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
- BestVPlan, BestVF);
+ BestVPlan, BestVF, VScale);
+ }
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::narrowInterleaveGroups(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 87bb79c769b87..b34c1fbeae7fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3328,8 +3328,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
/// BranchOnCond recipe.
-void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
- ElementCount VF) {
+void VPlanTransforms::addBranchWeightToMiddleTerminator(
+ VPlan &Plan, ElementCount VF, std::optional<unsigned> VScale) {
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
auto *MiddleTerm =
dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
@@ -3341,6 +3341,8 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(VPlan &Plan,
"must have a BranchOnCond");
// Assume that `TripCount % VectorStep ` is equally distributed.
unsigned VectorStep = Plan.getUF() * VF.getKnownMinValue();
+ if (VF.isScalable() && VScale.has_value())
+ VectorStep *= *VScale;
assert(VectorStep > 0 && "trip count should not be zero");
MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
MDNode *BranchWeights =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 40885cd52a127..a814dbf3cd64a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -238,7 +238,8 @@ struct VPlanTransforms {
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
/// a BranchOnCond recipe.
- static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF);
+ static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
+ std::optional<unsigned> VScale);
};
} // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
index 9435c544fc812..1f619898ea788 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll
@@ -92,7 +92,7 @@ for.cond.cleanup: ; preds = %for.body
; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 3}
+; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7}
; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0}
; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]}
;.
>From 00ed355082eb063bcaa6f1915ca68f6a3034e3d6 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Fri, 27 Jun 2025 12:15:23 +0000
Subject: [PATCH 2/2] Fix review comments
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++---
llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 5 +++--
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b34c1fbeae7fd..dcad635f765e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3329,7 +3329,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a
/// BranchOnCond recipe.
void VPlanTransforms::addBranchWeightToMiddleTerminator(
- VPlan &Plan, ElementCount VF, std::optional<unsigned> VScale) {
+ VPlan &Plan, ElementCount VF, std::optional<unsigned> VScaleForTuning) {
VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
auto *MiddleTerm =
dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator());
@@ -3341,8 +3341,8 @@ void VPlanTransforms::addBranchWeightToMiddleTerminator(
"must have a BranchOnCond");
// Assume that `TripCount % VectorStep ` is equally distributed.
unsigned VectorStep = Plan.getUF() * VF.getKnownMinValue();
- if (VF.isScalable() && VScale.has_value())
- VectorStep *= *VScale;
+ if (VF.isScalable() && VScaleForTuning.has_value())
+ VectorStep *= *VScaleForTuning;
assert(VectorStep > 0 && "trip count should not be zero");
MDBuilder MDB(Plan.getScalarHeader()->getIRBasicBlock()->getContext());
MDNode *BranchWeights =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index a814dbf3cd64a..8d2eded45da22 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -238,8 +238,9 @@ struct VPlanTransforms {
/// Add branch weight metadata, if the \p Plan's middle block is terminated by
/// a BranchOnCond recipe.
- static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
- std::optional<unsigned> VScale);
+ static void
+ addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF,
+ std::optional<unsigned> VScaleForTuning);
};
} // namespace llvm
More information about the llvm-commits
mailing list