[llvm-branch-commits] [llvm] [llvm-profgen] Support [buildid:]addr in unsymbolized profile context (PR #194205)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Apr 26 22:56:26 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Amir Ayupov (aaupov)
<details>
<summary>Changes</summary>
Extend createCtxVectorFromStr with an optional FilterBuildID parameter
to handle [buildid:] prefixed address frames in unsymbolized profiles.
This enables a single multi-DSO unsymbolized profile file where profgen
is invoked N times with --filter-build-id=X against the same file.
Context format: [buildid:0xaddr1 @ buildid:0xaddr2 @ ...]
Ranges/branches remain plain hex — they inherit the DSO from the leaf
context frame.
createCtxVectorFromStr (SampleProf.h):
- Accept optional FilterBuildID parameter
- Strip matching buildid prefix, replace non-matching with 0x0
- Return bool: true if leaf frame buildid matches (internal)
readUnsymbolizedProfile (PerfReader.cpp):
- Pass BinaryBuildID to createCtxVectorFromStr
- Skip entire block (context + ranges + branches) when leaf is external
Test Plan:
filter-build-id-unsymbolized.test verifies:
- Matching buildid produces correct profile
- Non-matching buildid only processes its own contexts
- Backward compatibility with non-buildid unsymbolized profiles
---
Full diff: https://github.com/llvm/llvm-project/pull/194205.diff
4 Files Affected:
- (modified) llvm/include/llvm/ProfileData/SampleProf.h (+20-3)
- (added) llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw (+24)
- (added) llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test (+25)
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+10-1)
``````````diff
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 14243688d075a..39278d87d1da8 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -599,22 +599,39 @@ class SampleContext {
}
/// Create a context vector from a given context string and save it in
- /// `Context`.
- static void createCtxVectorFromStr(StringRef ContextStr,
- SampleContextFrameVector &Context) {
+ /// `Context`. If \p FilterBuildID is non-empty, strip matching
+ /// `buildid:` prefixes; non-matching frames become 0x0.
+ /// Returns true if the leaf (first) frame's buildid matches or no
+ /// filter is set, false if the leaf is external (non-matching buildid).
+ static bool createCtxVectorFromStr(StringRef ContextStr,
+ SampleContextFrameVector &Context,
+ StringRef FilterBuildID = StringRef()) {
// Remove encapsulating '[' and ']' if any
ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
StringRef ContextRemain = ContextStr;
StringRef ChildContext;
FunctionId Callee;
+ bool LeafIsInternal = true;
+ bool IsLeaf = true;
while (!ContextRemain.empty()) {
auto ContextSplit = ContextRemain.split(" @ ");
ChildContext = ContextSplit.first;
ContextRemain = ContextSplit.second;
+ // Strip [buildid:] prefix. On mismatch, the frame address is zero.
+ if (!FilterBuildID.empty()) {
+ auto ColonSplit = ChildContext.split(':');
+ bool Matches = ColonSplit.first == FilterBuildID;
+ ChildContext = Matches ? ColonSplit.second : StringRef("0x0");
+ if (IsLeaf) {
+ LeafIsInternal = Matches;
+ IsLeaf = false;
+ }
+ }
LineLocation CallSiteLoc(0, 0);
decodeContextString(ChildContext, Callee, CallSiteLoc);
Context.emplace_back(Callee, CallSiteLoc);
}
+ return LeafIsInternal;
}
// Decode context string for a frame to get function name and location.
diff --git a/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw b/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw
new file mode 100644
index 0000000000000..56aa2e9ae3491
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw
@@ -0,0 +1,24 @@
+[aabb1122:0x4005dc @ aabb1122:0x400634]
+ 3
+ 5ff-62f:6
+ 634-637:6
+ 645-645:6
+ 3
+ 62f->5b0:6
+ 637->645:6
+ 645->5ff:6
+[aabb1122:0x4005dc @ aabb1122:0x400634 @ aabb1122:0x400684]
+ 4
+ 5b0-5c8:2
+ 5b0-5d7:4
+ 5dc-5e9:2
+ 5e5-5e9:4
+ 3
+ 5c8->5dc:4
+ 5d7->5e5:4
+ 5e9->634:6
+[deadbeef:0x700100 @ deadbeef:0x700200]
+ 1
+ 100-200:5
+ 1
+ 100->200:5
diff --git a/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test b/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test
new file mode 100644
index 0000000000000..ab067b819a5a1
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test
@@ -0,0 +1,25 @@
+; REQUIRES: x86_64-linux
+; Test buildid filtering in unsymbolized profile format.
+; Context frames carry [buildid:] prefix, ranges/branches are plain hex.
+
+;; Test 1: Matching buildid - only aabb1122 contexts are processed.
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/buildid-unsymbolized.raw --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t1 --filter-build-id=aabb1122 --profile-summary-cold-count=0
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-MATCH
+
+; CHECK-MATCH: 0x4005dc:202:0
+; CHECK-MATCH: 0: 0x400634:202
+; CHECK-MATCH: 0: 0x400684:100
+
+;; Test 2: Non-matching buildid - aabb1122 contexts are skipped.
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/buildid-unsymbolized.raw --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --filter-build-id=ccdd3344 --profile-summary-cold-count=0 2>&1 | FileCheck %s --check-prefix=CHECK-NOMATCH
+; CHECK-NOMATCH: warning:
+
+;; Test 3: Backward compatibility - unsymbolized profile without buildid
+;; prefixes works unchanged when no --filter-build-id is specified.
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t.unsym --skip-symbolization --profile-summary-cold-count=0
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.unsym --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t3 --profile-summary-cold-count=0
+; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-COMPAT
+
+; CHECK-COMPAT: main:202:0
+; CHECK-COMPAT: 1: foo:202
+; CHECK-COMPAT: 3: bar:100
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 785946188b0b6..0b05a0efbb783 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1025,16 +1025,25 @@ void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
TraceStream TraceIt(FileName);
+ StringRef BinaryBuildID = getFilterBuildID(Binary);
while (!TraceIt.isAtEoF()) {
std::shared_ptr<StringBasedCtxKey> Key =
std::make_shared<StringBasedCtxKey>();
StringRef Line = TraceIt.getCurrentLine();
// Read context stack for CS profile.
+ // Context frames may have optional [buildid:] prefix.
if (Line.starts_with("[")) {
ProfileIsCS = true;
auto I = ContextStrSet.insert(Line.str());
- SampleContext::createCtxVectorFromStr(*I.first, Key->Context);
+ bool LeafIsInternal = SampleContext::createCtxVectorFromStr(
+ *I.first, Key->Context, BinaryBuildID);
TraceIt.advance();
+ if (!LeafIsInternal) {
+ // Skip ranges and branches for non-matching leaf buildid.
+ while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with("["))
+ TraceIt.advance();
+ continue;
+ }
}
auto Ret =
SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());
``````````
</details>
https://github.com/llvm/llvm-project/pull/194205
More information about the llvm-branch-commits
mailing list