[llvm-branch-commits] [llvm] [LV] capture branch weights for constant trip counts (PR #175096)
Mircea Trofin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 14 11:09:48 PST 2026
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/175096
>From bff26f20f8c122fd35f37fc3be5bf1e06cdac1c5 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Thu, 8 Jan 2026 15:02:18 -0800
Subject: [PATCH] capture weights
---
.../Transforms/Vectorize/LoopVectorize.cpp | 2 ++
llvm/lib/Transforms/Vectorize/VPlan.cpp | 31 ++++++++++++++-----
.../Transforms/LoopVectorize/tripcount.ll | 15 +++++----
3 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f77e35038b84e..9f07578ff143d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7514,6 +7514,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
unsigned OrigLoopInvocationWeight = 0;
std::optional<unsigned> OrigAverageTripCount =
getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
+ if (!OrigLoopInvocationWeight)
+ OrigLoopInvocationWeight = SE.getSmallConstantTripCount(OrigLoop);
BestVPlan.execute(&State);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a6a46e36b397d..edb65a7d2b97a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -52,6 +52,10 @@
using namespace llvm;
using namespace llvm::VPlanPatternMatch;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
/// @{
/// Metadata attribute names
const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
@@ -1692,13 +1696,26 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
// For scalable vectorization we can't know at compile time how many
// iterations of the loop are handled in one vector iteration, so instead
// use the value of vscale used for tuning.
- if (!OrigAverageTripCount)
- return;
- // Calculate number of iterations in unrolled loop.
- unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
- // Calculate number of iterations for remainder loop.
- unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
-
+ unsigned AverageVectorTripCount = 0;
+ unsigned RemainderAverageTripCount = 0;
+
+ if (!OrigAverageTripCount) {
+ if (auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
+ !EC || !EC->getCount())
+ return;
+ auto &SE = *PSE.getSE();
+ AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
+ if (Plan.getScalarPreheader()->hasPredecessors())
+ RemainderAverageTripCount =
+ SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
+ if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
+ return;
+ } else {
+ // Calculate number of iterations in unrolled loop.
+ AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
+ // Calculate number of iterations for remainder loop.
+ RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
+ }
if (HeaderVPBB) {
setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
OrigLoopInvocationWeight);
diff --git a/llvm/test/Transforms/LoopVectorize/tripcount.ll b/llvm/test/Transforms/LoopVectorize/tripcount.ll
index 5af09c9399f90..54abe08134aa1 100644
--- a/llvm/test/Transforms/LoopVectorize/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/tripcount.ll
@@ -328,9 +328,10 @@ for.end: ; preds = %for.body
ret i32 0
}
-define i32 @const_trip_over_profile() {
+define i32 @const_trip_over_profile() !prof !0 {
; constant trip count takes precedence over profile data
-; CHECK-LABEL: define i32 @const_trip_over_profile() {
+; CHECK-LABEL: define i32 @const_trip_over_profile(
+; CHECK-SAME: ) !prof [[PROF1]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -344,7 +345,7 @@ define i32 @const_trip_over_profile() {
; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK: [[SCALAR_PH]]:
@@ -358,7 +359,7 @@ define i32 @const_trip_over_profile() {
; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF0]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i32 0
;
@@ -401,6 +402,8 @@ for.end: ; preds = %for.body
; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1}
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]}
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]}
-; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META6]], [[META7]]}
-; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META7]], [[META6]]}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 1001, i32 249249}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]}
+; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1001}
+; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]}
;.
More information about the llvm-branch-commits
mailing list