[llvm-branch-commits] [llvm] [LV] capture branch weights for constant trip counts (PR #175096)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jan 13 16:53:30 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Mircea Trofin (mtrofin)

<details>
<summary>Changes</summary>

When a vectorized loop has constant trip, it's important to update the profile information accordingly. Hotness analysis will only look at profile info. For example, in the `tripcount.ll`test, without producing the profile info, in the `const_trip_over_profile` function, the BFI of the `vector.body` would be 32 (this is the expected value when synthetic branch weights are used, in loops). The real value is 250. The `for.body`value was _very_ incorrect before, too (and detrimentally so, as it would have appeared as "very hot" when it wasn't):

obtained by printing BFI in the RUN: command, i.e. `build/bin/opt < llvm/test/Transforms/LoopVectorize/tripcount.ll -passes="loop-vectorize,print<block-freq>" -loop-vectorize-with-block-frequency -S -o /dev/null`. Showing only the `float` value, i.e. the BFI relative to the function entry BB.

```

Printing analysis results of BFI for function 'const_trip_over_profile':  
block-frequency-info: const_trip_over_profile

```

| Block | Before | After |
| ----- | ------ | ----- |
| entry | float = 1.0 | float = 1.0 |
| vector.ph | float = 1.0 | float = 1.0 |
| vector.body | float = **32.0** | float = **250.0** |
| middle.block | float = 1.0 | float = 1.0 |
| scalar.ph | float = 1.0 | float = 1.0 |
| for.body | float = **2147483647.8** | float = **1.0** |
| for.end | float = 1.0 | float = 1.0 |


---
Full diff: https://github.com/llvm/llvm-project/pull/175096.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+24-7) 
- (modified) llvm/test/Transforms/LoopVectorize/tripcount.ll (+9-6) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f77e35038b84e..9f07578ff143d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7514,6 +7514,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   unsigned OrigLoopInvocationWeight = 0;
   std::optional<unsigned> OrigAverageTripCount =
       getLoopEstimatedTripCount(OrigLoop, &OrigLoopInvocationWeight);
+  if (!OrigLoopInvocationWeight)
+    OrigLoopInvocationWeight = SE.getSmallConstantTripCount(OrigLoop);
 
   BestVPlan.execute(&State);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a6a46e36b397d..edb65a7d2b97a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -52,6 +52,10 @@
 using namespace llvm;
 using namespace llvm::VPlanPatternMatch;
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
 /// @{
 /// Metadata attribute names
 const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
@@ -1692,13 +1696,26 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo(
   // For scalable vectorization we can't know at compile time how many
   // iterations of the loop are handled in one vector iteration, so instead
   // use the value of vscale used for tuning.
-  if (!OrigAverageTripCount)
-    return;
-  // Calculate number of iterations in unrolled loop.
-  unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
-  // Calculate number of iterations for remainder loop.
-  unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
-
+  unsigned AverageVectorTripCount = 0;
+  unsigned RemainderAverageTripCount = 0;
+
+  if (!OrigAverageTripCount) {
+    if (auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
+        !EC || !EC->getCount())
+      return;
+    auto &SE = *PSE.getSE();
+    AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
+    if (Plan.getScalarPreheader()->hasPredecessors())
+      RemainderAverageTripCount =
+          SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
+    if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
+      return;
+  } else {
+    // Calculate number of iterations in unrolled loop.
+    AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
+    // Calculate number of iterations for remainder loop.
+    RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
+  }
   if (HeaderVPBB) {
     setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
                               OrigLoopInvocationWeight);
diff --git a/llvm/test/Transforms/LoopVectorize/tripcount.ll b/llvm/test/Transforms/LoopVectorize/tripcount.ll
index d647e9318defb..fcb8fdec24a29 100644
--- a/llvm/test/Transforms/LoopVectorize/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/tripcount.ll
@@ -321,9 +321,10 @@ for.end:                                          ; preds = %for.body
   ret i32 0
 }
 
-define i32 @const_trip_over_profile() {
+define i32 @const_trip_over_profile() !prof !0 {
 ; constant trip count takes precedence over profile data
-; CHECK-LABEL: define i32 @const_trip_over_profile() {
+; CHECK-LABEL: define i32 @const_trip_over_profile(
+; CHECK-SAME: ) !prof [[PROF1]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
@@ -337,7 +338,7 @@ define i32 @const_trip_over_profile() {
 ; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF15:![0-9]+]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK:       [[SCALAR_PH]]:
@@ -351,7 +352,7 @@ define i32 @const_trip_over_profile() {
 ; CHECK-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i32 [[I_08]], 1000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF0]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]], !prof [[PROF17:![0-9]+]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
@@ -401,6 +402,8 @@ for.end:                                          ; preds = %for.body
 ; CHECK: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 1}
 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META7]]}
 ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META7]], [[META6]]}
-; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META6]], [[META7]]}
-; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META7]], [[META6]]}
+; CHECK: [[PROF15]] = !{!"branch_weights", i32 1001, i32 249249}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META6]], [[META7]], [[META8]]}
+; CHECK: [[PROF17]] = !{!"branch_weights", i32 0, i32 1001}
+; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META7]], [[META6]], [[META12]]}
 ;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/175096


More information about the llvm-branch-commits mailing list