[llvm-branch-commits] [llvm] [llvm-profgen] Support [buildid:]addr in unsymbolized profile context (PR #194205)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Apr 26 22:56:26 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Amir Ayupov (aaupov)

<details>
<summary>Changes</summary>

Extend createCtxVectorFromStr with an optional FilterBuildID parameter
to handle [buildid:] prefixed address frames in unsymbolized profiles.
This enables a single multi-DSO unsymbolized profile file where profgen
is invoked N times with --filter-build-id=X against the same file.

Context format: [buildid:0xaddr1 @ buildid:0xaddr2 @ ...]
Ranges/branches remain plain hex — they inherit the DSO from the leaf
context frame.

createCtxVectorFromStr (SampleProf.h):
- Accept optional FilterBuildID parameter
- Strip matching buildid prefix, replace non-matching with 0x0
- Return bool: true if leaf frame buildid matches (internal)

readUnsymbolizedProfile (PerfReader.cpp):
- Pass BinaryBuildID to createCtxVectorFromStr
- Skip entire block (context + ranges + branches) when leaf is external

Test Plan:
filter-build-id-unsymbolized.test verifies:
- Matching buildid produces correct profile
- Non-matching buildid only processes its own contexts
- Backward compatibility with non-buildid unsymbolized profiles


---
Full diff: https://github.com/llvm/llvm-project/pull/194205.diff


4 Files Affected:

- (modified) llvm/include/llvm/ProfileData/SampleProf.h (+20-3) 
- (added) llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw (+24) 
- (added) llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test (+25) 
- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+10-1) 


``````````diff
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 14243688d075a..39278d87d1da8 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -599,22 +599,39 @@ class SampleContext {
   }
 
   /// Create a context vector from a given context string and save it in
-  /// `Context`.
-  static void createCtxVectorFromStr(StringRef ContextStr,
-                                     SampleContextFrameVector &Context) {
+  /// `Context`. If \p FilterBuildID is non-empty, strip matching
+  /// `buildid:` prefixes; non-matching frames become 0x0.
+  /// Returns true if the leaf (first) frame's buildid matches or no
+  /// filter is set, false if the leaf is external (non-matching buildid).
+  static bool createCtxVectorFromStr(StringRef ContextStr,
+                                     SampleContextFrameVector &Context,
+                                     StringRef FilterBuildID = StringRef()) {
     // Remove encapsulating '[' and ']' if any
     ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
     StringRef ContextRemain = ContextStr;
     StringRef ChildContext;
     FunctionId Callee;
+    bool LeafIsInternal = true;
+    bool IsLeaf = true;
     while (!ContextRemain.empty()) {
       auto ContextSplit = ContextRemain.split(" @ ");
       ChildContext = ContextSplit.first;
       ContextRemain = ContextSplit.second;
+      // Strip [buildid:] prefix. On mismatch, the frame address is zero.
+      if (!FilterBuildID.empty()) {
+        auto ColonSplit = ChildContext.split(':');
+        bool Matches = ColonSplit.first == FilterBuildID;
+        ChildContext = Matches ? ColonSplit.second : StringRef("0x0");
+        if (IsLeaf) {
+          LeafIsInternal = Matches;
+          IsLeaf = false;
+        }
+      }
       LineLocation CallSiteLoc(0, 0);
       decodeContextString(ChildContext, Callee, CallSiteLoc);
       Context.emplace_back(Callee, CallSiteLoc);
     }
+    return LeafIsInternal;
   }
 
   // Decode context string for a frame to get function name and location.
diff --git a/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw b/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw
new file mode 100644
index 0000000000000..56aa2e9ae3491
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/Inputs/buildid-unsymbolized.raw
@@ -0,0 +1,24 @@
+[aabb1122:0x4005dc @ aabb1122:0x400634]
+  3
+  5ff-62f:6
+  634-637:6
+  645-645:6
+  3
+  62f->5b0:6
+  637->645:6
+  645->5ff:6
+[aabb1122:0x4005dc @ aabb1122:0x400634 @ aabb1122:0x400684]
+  4
+  5b0-5c8:2
+  5b0-5d7:4
+  5dc-5e9:2
+  5e5-5e9:4
+  3
+  5c8->5dc:4
+  5d7->5e5:4
+  5e9->634:6
+[deadbeef:0x700100 @ deadbeef:0x700200]
+  1
+  100-200:5
+  1
+  100->200:5
diff --git a/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test b/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test
new file mode 100644
index 0000000000000..ab067b819a5a1
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/filter-build-id-unsymbolized.test
@@ -0,0 +1,25 @@
+; REQUIRES: x86_64-linux
+; Test buildid filtering in unsymbolized profile format.
+; Context frames carry [buildid:] prefix, ranges/branches are plain hex.
+
+;; Test 1: Matching buildid - only aabb1122 contexts are processed.
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/buildid-unsymbolized.raw --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t1 --filter-build-id=aabb1122 --profile-summary-cold-count=0
+; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-MATCH
+
+; CHECK-MATCH: 0x4005dc:202:0
+; CHECK-MATCH:  0: 0x400634:202
+; CHECK-MATCH:   0: 0x400684:100
+
+;; Test 2: Non-matching buildid - aabb1122 contexts are skipped.
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/buildid-unsymbolized.raw --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --filter-build-id=ccdd3344 --profile-summary-cold-count=0 2>&1 | FileCheck %s --check-prefix=CHECK-NOMATCH
+; CHECK-NOMATCH: warning:
+
+;; Test 3: Backward compatibility - unsymbolized profile without buildid
+;; prefixes works unchanged when no --filter-build-id is specified.
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t.unsym --skip-symbolization --profile-summary-cold-count=0
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.unsym --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t3 --profile-summary-cold-count=0
+; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-COMPAT
+
+; CHECK-COMPAT: main:202:0
+; CHECK-COMPAT:  1: foo:202
+; CHECK-COMPAT:   3: bar:100
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 785946188b0b6..0b05a0efbb783 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1025,16 +1025,25 @@ void UnsymbolizedProfileReader::readSampleCounters(TraceStream &TraceIt,
 
 void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) {
   TraceStream TraceIt(FileName);
+  StringRef BinaryBuildID = getFilterBuildID(Binary);
   while (!TraceIt.isAtEoF()) {
     std::shared_ptr<StringBasedCtxKey> Key =
         std::make_shared<StringBasedCtxKey>();
     StringRef Line = TraceIt.getCurrentLine();
     // Read context stack for CS profile.
+    // Context frames may have optional [buildid:] prefix.
     if (Line.starts_with("[")) {
       ProfileIsCS = true;
       auto I = ContextStrSet.insert(Line.str());
-      SampleContext::createCtxVectorFromStr(*I.first, Key->Context);
+      bool LeafIsInternal = SampleContext::createCtxVectorFromStr(
+          *I.first, Key->Context, BinaryBuildID);
       TraceIt.advance();
+      if (!LeafIsInternal) {
+        // Skip ranges and branches for non-matching leaf buildid.
+        while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().starts_with("["))
+          TraceIt.advance();
+        continue;
+      }
     }
     auto Ret =
         SampleCounters.emplace(Hashable<ContextKey>(Key), SampleCounter());

``````````

</details>


https://github.com/llvm/llvm-project/pull/194205


More information about the llvm-branch-commits mailing list