[llvm] [ctxprof] Scale up everything under a root by its `TotalRootEntryCount` (PR #136015)

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 19:41:22 PDT 2025


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/136015

>From 5848ae8c2933716535df434071a621480b9aca5c Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 16 Apr 2025 13:11:04 -0700
Subject: [PATCH] [ctxprof] Scale up everything under a root by its
 `TotalRootEntryCount`

---
 llvm/lib/Analysis/CtxProfAnalysis.cpp         | 58 +++++++++++--------
 .../CtxProfAnalysis/flatten-and-annotate.ll   | 26 ++++-----
 .../CtxProfAnalysis/flatten-check-path.ll     |  4 +-
 .../flatten-insert-icp-mdprof.ll              |  2 +-
 .../CtxProfAnalysis/flatten-zero-path.ll      |  2 +-
 .../Analysis/CtxProfAnalysis/full-cycle.ll    |  8 +--
 llvm/test/Analysis/CtxProfAnalysis/inline.ll  |  8 ++-
 llvm/test/Analysis/CtxProfAnalysis/load.ll    |  8 +--
 8 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 391631e15aa89..7f1f3233bee0d 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -621,17 +621,23 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
   return nullptr;
 }
 
-template <class ProfilesTy, class ProfTy>
-static void preorderVisit(ProfilesTy &Profiles,
-                          function_ref<void(ProfTy &)> Visitor) {
+template <class ProfTy>
+static void preorderVisitOneRoot(ProfTy &Profile,
+                                 function_ref<void(ProfTy &)> Visitor) {
   std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
     Visitor(Ctx);
     for (auto &[_, SubCtxSet] : Ctx.callsites())
       for (auto &[__, Subctx] : SubCtxSet)
         Traverser(Subctx);
   };
+  Traverser(Profile);
+}
+
+template <class ProfilesTy, class ProfTy>
+static void preorderVisit(ProfilesTy &Profiles,
+                          function_ref<void(ProfTy &)> Visitor) {
   for (auto &[_, P] : Profiles)
-    Traverser(P);
+    preorderVisitOneRoot<ProfTy>(P, Visitor);
 }
 
 void PGOContextualProfile::initIndex() {
@@ -683,40 +689,46 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
 const CtxProfFlatProfile PGOContextualProfile::flatten() const {
   CtxProfFlatProfile Flat;
   auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
-                        const SmallVectorImpl<uint64_t> &From) {
+                        const SmallVectorImpl<uint64_t> &From,
+                        uint64_t SamplingRate) {
     if (Into.empty())
       Into.resize(From.size());
     assert(Into.size() == From.size() &&
            "All contexts corresponding to a function should have the exact "
            "same number of counters.");
     for (size_t I = 0, E = Into.size(); I < E; ++I)
-      Into[I] += From[I];
+      Into[I] += From[I] * SamplingRate;
   };
 
-  preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
-                const PGOCtxProfContext>(
-      Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
-        Accummulate(Flat[Ctx.guid()], Ctx.counters());
-      });
-  for (const auto &[_, RC] : Profiles.Contexts)
-    for (const auto &[G, Unh] : RC.getUnhandled())
-      Accummulate(Flat[G], Unh);
+  for (const auto &[_, CtxRoot] : Profiles.Contexts) {
+    const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
+    preorderVisitOneRoot<const PGOCtxProfContext>(
+        CtxRoot, [&](const PGOCtxProfContext &Ctx) {
+          Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
+        });
+
+    for (const auto &[G, Unh] : CtxRoot.getUnhandled())
+      Accummulate(Flat[G], Unh, SamplingFactor);
+  }
   for (const auto &[G, FC] : Profiles.FlatProfiles)
-    Accummulate(Flat[G], FC);
+    Accummulate(Flat[G], FC, /*SamplingRate=*/1);
   return Flat;
 }
 
 const CtxProfFlatIndirectCallProfile
 PGOContextualProfile::flattenVirtCalls() const {
   CtxProfFlatIndirectCallProfile Ret;
-  preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
-                const PGOCtxProfContext>(
-      Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
-        auto &Targets = Ret[Ctx.guid()];
-        for (const auto &[ID, SubctxSet] : Ctx.callsites())
-          for (const auto &Subctx : SubctxSet)
-            Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
-      });
+  for (const auto &[_, CtxRoot] : Profiles.Contexts) {
+    const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
+    preorderVisitOneRoot<const PGOCtxProfContext>(
+        CtxRoot, [&](const PGOCtxProfContext &Ctx) {
+          auto &Targets = Ret[Ctx.guid()];
+          for (const auto &[ID, SubctxSet] : Ctx.callsites())
+            for (const auto &Subctx : SubctxSet)
+              Targets[ID][Subctx.first] +=
+                  Subctx.second.getEntrycount() * TotalRootEntryCount;
+        });
+  }
   return Ret;
 }
 
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
index 6daf4f5020043..d91f44047e739 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -23,13 +23,13 @@
 ; PRELINK-LABEL: yes:
 ; PRELINK-NEXT:    call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
 ; PRELINK:      ![[#]] = !{i32 1, !"ProfileSummary", !1}
-; PRELINK:      ![[#]] = !{!"TotalCount", i64 3595}
-; PRELINK:      ![[#]] = !{!"MaxCount", i64 3000}
-; PRELINK:      ![[#]] = !{!"MaxInternalCount", i64 3000}
-; PRELINK:      ![[#]] = !{!"MaxFunctionCount", i64 300}
+; PRELINK:      ![[#]] = !{!"TotalCount", i64 151600}
+; PRELINK:      ![[#]] = !{!"MaxCount", i64 102000}
+; PRELINK:      ![[#]] = !{!"MaxInternalCount", i64 102000}
+; PRELINK:      ![[#]] = !{!"MaxFunctionCount", i64 20100}
 ; PRELINK:      ![[#]] = !{!"NumCounts", i64 6}
 ; PRELINK:      ![[#]] = !{!"NumFunctions", i64 3}
-; PRELINK:       ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}
+; PRELINK:       ![[PREPROF]] = !{!"branch_weights", i32 4000, i32 6000}
 
 ; Check that the output has:
 ;  - no instrumentation
@@ -49,10 +49,10 @@
 ; The postlink summary is restricted to the stuff under the root - including the
 ; "unhandled" data.
 ; POSTLINK:      ![[#]] = !{i32 1, !"ProfileSummary", !1}
-; POSTLINK:      ![[#]] = !{!"TotalCount", i64 1495}
-; POSTLINK:      ![[#]] = !{!"MaxCount", i64 1000}
-; POSTLINK:      ![[#]] = !{!"MaxInternalCount", i64 1000}
-; POSTLINK:      ![[#]] = !{!"MaxFunctionCount", i64 200}
+; POSTLINK:      ![[#]] = !{!"TotalCount", i64 149500}
+; POSTLINK:      ![[#]] = !{!"MaxCount", i64 100000}
+; POSTLINK:      ![[#]] = !{!"MaxInternalCount", i64 100000}
+; POSTLINK:      ![[#]] = !{!"MaxFunctionCount", i64 20000}
 ; POSTLINK:      ![[#]] = !{!"NumCounts", i64 6}
 ; POSTLINK:      ![[#]] = !{!"NumFunctions", i64 3}
 
@@ -60,14 +60,14 @@
 ; @foo will be called both unconditionally and conditionally, on the "yes" branch
 ; which has a count of 40. So 140 times.
 
-; POSTLINK:       ![[FOO_EP]] = !{!"function_entry_count", i64 140} 
+; POSTLINK:       ![[FOO_EP]] = !{!"function_entry_count", i64 14000} 
 
 ; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
 ; Which means its "yes" branch is taken 140 - 15 times.
 
-; POSTLINK:       ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15} 
-; POSTLINK:       ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
-; POSTLINK:       ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60} 
+; POSTLINK:       ![[FOO_BW]] = !{!"branch_weights", i32 12500, i32 1500} 
+; POSTLINK:       ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 10000}
+; POSTLINK:       ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 4000, i32 6000} 
 
 ;--- profile.yaml
 Contexts:
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
index bf672998c1e39..d69768d295907 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
@@ -13,8 +13,8 @@
 
 ; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
 ; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]
-; CHECK: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1}
-; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1}
+; CHECK: ![[PROF1]] = !{!"branch_weights", i32 2, i32 2}
+; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 2}
 ; ASSERTION: Assertion `allTakenPathsExit()
 
 ; b1->exit is the only way out from b1, but the exit block would have been
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
index 13beddc05c7a2..2943dce43e01f 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -11,7 +11,7 @@
 ; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
 ; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
 ; PRELINK-NEXT: call void @bar(){{$}}
-; PRELINK:      ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+; PRELINK:      ![[VPPROF]] = !{!"VP", i32 0, i64 25, i64 5678, i64 20, i64 5555, i64 5}
 
 ; RUN: cp %t/example.ll %t/1234.ll
 ; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
index 558f385b4bb9d..3d349119cfd0c 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: yes:
 ; CHECK:          br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
 ; CHECK-NOT:   !prof
-; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
+; CHECK: ![[C1]] = !{!"branch_weights", i32 72, i32 0}
 
 ;--- 1234.ll
 define void @f1(i32 %cond) !guid !0 {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
index 63abdd892bffb..8cadcae1654c9 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -108,7 +108,7 @@ Contexts:
                 Counters:        [ 1, 2 ]
 
 Flat Profile:
-2072045998141807037 : 7 
-3087265239403591524 : 11 9 
-4197650231481825559 : 2 
-10507721908651011566 : 1 
+2072045998141807037 : 70 
+3087265239403591524 : 110 90 
+4197650231481825559 : 20 
+10507721908651011566 : 10 
diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
index a069acee1c943..8238bc241d180 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
@@ -44,11 +44,13 @@
 ; PIPELINE-LABEL: loop:
 ; PIPELINE:         br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]
 
-; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10}
+; NOTE! all values are multiplied by the TotalRootEntryCount, which is 24
+;
+; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240}
 ; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
-; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2}
+; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48}
 ; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
-; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8}
+; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192}
 
 ;--- 1000.ll
 define i32 @entrypoint(i32 %x) !guid !0 {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index bd21a4b710630..92e7fdc40e229 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -60,10 +60,10 @@ Contexts:
     Counters:        [ 5 ]
 
 Flat Profile:
-12341 : 9 
-728453322856651412 : 6 7 
-11872291593386833696 : 1 
-12074870348631550642 : 5 
+12341 : 810 
+728453322856651412 : 24 28 
+11872291593386833696 : 4 
+12074870348631550642 : 120 
 ;--- example.ll
 declare void @bar()
 



More information about the llvm-commits mailing list