[llvm-branch-commits] [llvm] [LV] capture branch weights for constant trip counts (PR #175096)

Mircea Trofin via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jan 15 13:59:44 PST 2026


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/175096

>From dfe68e4705cacf6c5f10a4d5f169067818749a43 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Thu, 8 Jan 2026 15:02:18 -0800
Subject: [PATCH] capture weights

---
 llvm/lib/Transforms/Utils/ProfileVerify.cpp   |  2 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    |  2 ++
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 36 +++++++++++++++----
 .../Transforms/LoopVectorize/tripcount.ll     | 15 ++++----
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
index 69e03f01245db..9c34f3a1017a5 100644
--- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp
+++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp
@@ -31,7 +31,7 @@ static cl::opt<int64_t>
     DefaultFunctionEntryCount("profcheck-default-function-entry-count",
                               cl::init(1000));
 static cl::opt<bool>
-    AnnotateSelect("profcheck-annotate-select", cl::init(true),
+    AnnotateSelect("profcheck-annotate-select", cl::init(false),
                    cl::desc("Also inject (if missing) and verify MD_prof for "
                             "`select` instructions"));
 static cl::opt<bool>
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3f1e12e5d1cd0..a55c5e1f40491 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7526,6 +7526,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   unsigned OrigLoopInvocationWeight = 0;
   std::optional<unsigned> OrigAverageTripCount =
       getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
+  if (!OrigLoopInvocationWeight)
+    OrigLoopInvocationWeight = SE.getSmallConstantTripCount(OrigLoop);
 
   BestVPlan.execute(&State);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a6a46e36b397d..590b82660d90d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -52,6 +52,10 @@
 using namespace llvm;
 using namespace llvm::VPlanPatternMatch;
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 /// @{
 /// Metadata attribute names
 const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
@@ -1692,17 +1696,35 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
   // For scalable vectorization we can't know at compile time how many
   // iterations of the loop are handled in one vector iteration, so instead
   // use the value of vscale used for tuning.
-  if (!OrigAverageTripCount)
-    return;
-  // Calculate number of iterations in unrolled loop.
-  unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
-  // Calculate number of iterations for remainder loop.
-  unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
-
+  unsigned AverageVectorTripCount = 0;
+  unsigned RemainderAverageTripCount = 0;
+
+  if (!OrigAverageTripCount) {
+    if (auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
+        !EC || !EC->getCount())
+      return;
+    auto &SE = *PSE.getSE();
+    AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
+    if (Plan.getScalarPreheader()->hasPredecessors())
+      RemainderAverageTripCount =
+          SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
+    if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
+      return;
+  } else {
+    // Calculate number of iterations in unrolled loop.
+    AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
+    // Calculate number of iterations for remainder loop.
+    RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
+  }
   if (HeaderVPBB) {
     setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
                               OrigLoopInvocationWeight);
   }
+  // The remainder's header will be entered, so the trip count must be at least
+  // 1.
+  RemainderAverageTripCount = ProfcheckDisableMetadataFixes
+                                  ? RemainderAverageTripCount
+                                  : std::max(1U, RemainderAverageTripCount);
   if (Plan.getScalarPreheader()->hasPredecessors()) {
     setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
                               OrigLoopInvocationWeight);
diff --git a/llvm/test/Transforms/LoopVectorize/tripcount.ll b/llvm/test/Transforms/LoopVectorize/tripcount.ll
index 5af09c9399f90..54abe08134aa1 100644
--- a/llvm/test/Transforms/LoopVectorize/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/tripcount.ll
@@ -328,9 +328,10 @@ for.end:                                          ; preds = %for.body
   ret i32 0
 }
 
-define i32 @const_trip_over_profile() {
+define i32 @const_trip_over_profile() !prof !0 {
 ; constant trip count takes precedence over profile data
-; CHECK-LABEL: define i32 @const_trip_over_profile() {
+; CHECK-LABEL: define i32 @const_trip_over_profile(
+; CHECK-SAME: ) !prof [[PROF1]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
@@ -344,7 +345,7 @@ define i32 @const_trip_over_profile() {
 ; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -358,7 +359,7 @@ define i32 @const_trip_over_profile() {
 ; CHECK-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF0]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -401,6 +402,8 @@ for.end:                                          ; preds = %for.body
 ; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1}
 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]}
 ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]}
-; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META6]], [[META7]]}
-; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META7]], [[META6]]}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 1001, i32 249249}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]}
+; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1001}
+; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]}
 ;.



More information about the llvm-branch-commits mailing list