[llvm] [ctxprof] Scale up everything under a root by its `TotalRootEntryCount` (PR #136015)
Mircea Trofin via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 19:41:22 PDT 2025
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/136015
>From 5848ae8c2933716535df434071a621480b9aca5c Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Wed, 16 Apr 2025 13:11:04 -0700
Subject: [PATCH] [ctxprof] Scale up everything under a root by its
`TotalRootEntryCount`
---
llvm/lib/Analysis/CtxProfAnalysis.cpp | 58 +++++++++++--------
.../CtxProfAnalysis/flatten-and-annotate.ll | 26 ++++-----
.../CtxProfAnalysis/flatten-check-path.ll | 4 +-
.../flatten-insert-icp-mdprof.ll | 2 +-
.../CtxProfAnalysis/flatten-zero-path.ll | 2 +-
.../Analysis/CtxProfAnalysis/full-cycle.ll | 8 +--
llvm/test/Analysis/CtxProfAnalysis/inline.ll | 8 ++-
llvm/test/Analysis/CtxProfAnalysis/load.ll | 8 +--
8 files changed, 65 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 391631e15aa89..7f1f3233bee0d 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -621,17 +621,23 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
return nullptr;
}
-template <class ProfilesTy, class ProfTy>
-static void preorderVisit(ProfilesTy &Profiles,
- function_ref<void(ProfTy &)> Visitor) {
+template <class ProfTy>
+static void preorderVisitOneRoot(ProfTy &Profile,
+ function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
};
+ Traverser(Profile);
+}
+
+template <class ProfilesTy, class ProfTy>
+static void preorderVisit(ProfilesTy &Profiles,
+ function_ref<void(ProfTy &)> Visitor) {
for (auto &[_, P] : Profiles)
- Traverser(P);
+ preorderVisitOneRoot<ProfTy>(P, Visitor);
}
void PGOContextualProfile::initIndex() {
@@ -683,40 +689,46 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
CtxProfFlatProfile Flat;
auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
- const SmallVectorImpl<uint64_t> &From) {
+ const SmallVectorImpl<uint64_t> &From,
+ uint64_t SamplingRate) {
if (Into.empty())
Into.resize(From.size());
assert(Into.size() == From.size() &&
"All contexts corresponding to a function should have the exact "
"same number of counters.");
for (size_t I = 0, E = Into.size(); I < E; ++I)
- Into[I] += From[I];
+ Into[I] += From[I] * SamplingRate;
};
- preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
- const PGOCtxProfContext>(
- Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
- Accummulate(Flat[Ctx.guid()], Ctx.counters());
- });
- for (const auto &[_, RC] : Profiles.Contexts)
- for (const auto &[G, Unh] : RC.getUnhandled())
- Accummulate(Flat[G], Unh);
+ for (const auto &[_, CtxRoot] : Profiles.Contexts) {
+ const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
+ preorderVisitOneRoot<const PGOCtxProfContext>(
+ CtxRoot, [&](const PGOCtxProfContext &Ctx) {
+ Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
+ });
+
+ for (const auto &[G, Unh] : CtxRoot.getUnhandled())
+ Accummulate(Flat[G], Unh, SamplingFactor);
+ }
for (const auto &[G, FC] : Profiles.FlatProfiles)
- Accummulate(Flat[G], FC);
+ Accummulate(Flat[G], FC, /*SamplingRate=*/1);
return Flat;
}
const CtxProfFlatIndirectCallProfile
PGOContextualProfile::flattenVirtCalls() const {
CtxProfFlatIndirectCallProfile Ret;
- preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
- const PGOCtxProfContext>(
- Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
- auto &Targets = Ret[Ctx.guid()];
- for (const auto &[ID, SubctxSet] : Ctx.callsites())
- for (const auto &Subctx : SubctxSet)
- Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
- });
+ for (const auto &[_, CtxRoot] : Profiles.Contexts) {
+ const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
+ preorderVisitOneRoot<const PGOCtxProfContext>(
+ CtxRoot, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] +=
+ Subctx.second.getEntrycount() * TotalRootEntryCount;
+ });
+ }
return Ret;
}
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
index 6daf4f5020043..d91f44047e739 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -23,13 +23,13 @@
; PRELINK-LABEL: yes:
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
-; PRELINK: ![[#]] = !{!"TotalCount", i64 3595}
-; PRELINK: ![[#]] = !{!"MaxCount", i64 3000}
-; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000}
-; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300}
+; PRELINK: ![[#]] = !{!"TotalCount", i64 151600}
+; PRELINK: ![[#]] = !{!"MaxCount", i64 102000}
+; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 102000}
+; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 20100}
; PRELINK: ![[#]] = !{!"NumCounts", i64 6}
; PRELINK: ![[#]] = !{!"NumFunctions", i64 3}
-; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}
+; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 4000, i32 6000}
; Check that the output has:
; - no instrumentation
@@ -49,10 +49,10 @@
; The postlink summary is restricted to the stuff under the root - including the
; "unhandled" data.
; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
-; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495}
-; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000}
-; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000}
-; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200}
+; POSTLINK: ![[#]] = !{!"TotalCount", i64 149500}
+; POSTLINK: ![[#]] = !{!"MaxCount", i64 100000}
+; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 100000}
+; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 20000}
; POSTLINK: ![[#]] = !{!"NumCounts", i64 6}
; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3}
@@ -60,14 +60,14 @@
; @foo will be called both unconditionally and conditionally, on the "yes" branch
; which has a count of 40. So 140 times.
-; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
+; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 14000}
; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
; Which means its "yes" branch is taken 140 - 15 times.
-; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
-; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
-; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
+; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 12500, i32 1500}
+; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 10000}
+; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 4000, i32 6000}
;--- profile.yaml
Contexts:
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
index bf672998c1e39..d69768d295907 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
@@ -13,8 +13,8 @@
; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]
-; CHECK: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1}
-; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1}
+; CHECK: ![[PROF1]] = !{!"branch_weights", i32 2, i32 2}
+; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 2}
; ASSERTION: Assertion `allTakenPathsExit()
; b1->exit is the only way out from b1, but the exit block would have been
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
index 13beddc05c7a2..2943dce43e01f 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -11,7 +11,7 @@
; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
; PRELINK-NEXT: call void @bar(){{$}}
-; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 25, i64 5678, i64 20, i64 5555, i64 5}
; RUN: cp %t/example.ll %t/1234.ll
; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
index 558f385b4bb9d..3d349119cfd0c 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
@@ -14,7 +14,7 @@
; CHECK-LABEL: yes:
; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
; CHECK-NOT: !prof
-; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
+; CHECK: ![[C1]] = !{!"branch_weights", i32 72, i32 0}
;--- 1234.ll
define void @f1(i32 %cond) !guid !0 {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
index 63abdd892bffb..8cadcae1654c9 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -108,7 +108,7 @@ Contexts:
Counters: [ 1, 2 ]
Flat Profile:
-2072045998141807037 : 7
-3087265239403591524 : 11 9
-4197650231481825559 : 2
-10507721908651011566 : 1
+2072045998141807037 : 70
+3087265239403591524 : 110 90
+4197650231481825559 : 20
+10507721908651011566 : 10
diff --git a/llvm/test/Analysis/CtxProfAnalysis/inline.ll b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
index a069acee1c943..8238bc241d180 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/inline.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/inline.ll
@@ -44,11 +44,13 @@
; PIPELINE-LABEL: loop:
; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]
-; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10}
+; NOTE! all values are multiplied by the TotalRootEntryCount, which is 24
+;
+; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240}
; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
-; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2}
+; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48}
; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
-; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8}
+; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192}
;--- 1000.ll
define i32 @entrypoint(i32 %x) !guid !0 {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index bd21a4b710630..92e7fdc40e229 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -60,10 +60,10 @@ Contexts:
Counters: [ 5 ]
Flat Profile:
-12341 : 9
-728453322856651412 : 6 7
-11872291593386833696 : 1
-12074870348631550642 : 5
+12341 : 810
+728453322856651412 : 24 28
+11872291593386833696 : 4
+12074870348631550642 : 120
;--- example.ll
declare void @bar()
More information about the llvm-commits
mailing list