[llvm] [llvm-profgen] Add branch/target validation (PR #188620)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 25 14:53:43 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: HighW4y2H3ll

<details>
<summary>Changes</summary>

Add extra branch source and target validation checks for LBR samples. This is to check whether there are branch source samples that do not match a call/branch/ret instruction in the binary, and branch target samples that do not match a resolved Imm target address, or a function start address (in case of an indirect call).

Example output:
```
# X86
warning: 0.01% of sampled target addresses (27/376876) do not match the binary, likely due to profile and binary mismatch.
# AArch64
warning: 1.52% of sampled target addresses (4366/287204) do not match the binary, likely due to profile and binary mismatch.
```

---
Full diff: https://github.com/llvm/llvm-project/pull/188620.diff


4 Files Affected:

- (modified) llvm/tools/llvm-profgen/PerfReader.cpp (+55) 
- (modified) llvm/tools/llvm-profgen/PerfReader.h (+2) 
- (modified) llvm/tools/llvm-profgen/ProfiledBinary.cpp (+13) 
- (modified) llvm/tools/llvm-profgen/ProfiledBinary.h (+10) 


``````````diff
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 1dc59321fd91f..56d91044f8441 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1344,6 +1344,60 @@ void PerfScriptReader::warnInvalidRange() {
       "range end acrossing the unconditinal jmp.");
 }
 
+void PerfScriptReader::warnIfBranchTargetMismatch() {
+  // Collect unique branch source and target addresses from LBR samples,
+  // then check what percentage don't match known instructions in the binary.
+
+  std::unordered_set<uint64_t> SampleBranches;
+  std::unordered_set<uint64_t> SampleIndirectTargets;
+  std::unordered_set<uint64_t> SampleTargets;
+
+  for (const auto &Item : AggregatedSamples) {
+    const PerfSample *Sample = Item.first.getPtr();
+    for (const LBREntry &LBR : Sample->LBRStack) {
+      uint64_t Source = LBR.Source;
+      uint64_t Target = LBR.Target;
+      if (Source == ExternalAddr || Target == ExternalAddr)
+        continue;
+      SampleBranches.insert(Source);
+      if (Binary->addressIsIndirectBranch(Source))
+        SampleIndirectTargets.insert(Target);
+      else
+        SampleTargets.insert(Target);
+    }
+  }
+
+  auto CheckMismatch = [&](StringRef Kind,
+                           const std::unordered_set<uint64_t> &SampleAddrs,
+                           auto IsValidAddr) {
+    if (SampleAddrs.empty())
+      return;
+    uint64_t Mismatched = 0;
+    for (uint64_t Addr : SampleAddrs) {
+      if (!IsValidAddr(Addr))
+        Mismatched++;
+    }
+    double MismatchPct =
+        static_cast<double>(Mismatched) / SampleAddrs.size() * 100;
+    if (Mismatched) {
+      WithColor::warning()
+          << format("%.2f", MismatchPct) << "% of sampled " << Kind
+          << " addresses (" << Mismatched << "/" << SampleAddrs.size()
+          << ") do not match the binary, likely due to problematic raw samples or mismatch in binary.\n"
+    }
+  };
+
+  CheckMismatch("branch", SampleBranches, [&](uint64_t Addr) {
+    return Binary->addressIsTransfer(Addr);
+  });
+  CheckMismatch("target", SampleTargets, [&](uint64_t Addr) {
+    return Binary->addressIsBranchTarget(Addr) || Binary->findFuncRangeForStartAddr(Addr);
+  });
+  CheckMismatch("indirect branch target", SampleIndirectTargets, [&](uint64_t Addr) {
+    return Binary->addressIsCode(Addr);
+  });
+}
+
 void PerfScriptReader::parsePerfTraces() {
   // Parse perf traces and do aggregation.
   parseAndAggregateTrace();
@@ -1360,6 +1414,7 @@ void PerfScriptReader::parsePerfTraces() {
   // Generate unsymbolized profile.
   warnTruncatedStack();
   warnInvalidRange();
+  warnIfBranchTargetMismatch();
   generateUnsymbolizedProfile();
   AggregatedSamples.clear();
 
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 6e233d17f8e62..1ee6369de649a 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -650,6 +650,8 @@ class PerfScriptReader : public PerfReaderBase {
   void warnTruncatedStack();
   // Warn if range is invalid.
   void warnInvalidRange();
+  // Warn if sampled branch/target addresses don't match the binary.
+  void warnIfBranchTargetMismatch();
   // Extract call stack from the perf trace lines
   bool extractCallstack(TraceStream &TraceIt,
                         SmallVectorImpl<uint64_t> &CallStack);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 915e991e4068c..6a4ab7d6a854b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -628,6 +628,8 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
       if (MCDesc.isCall()) {
         CallAddressSet.insert(Address);
         UncondBranchAddrSet.insert(Address);
+        // Record the instruction after call as the branch target of a ret
+        BranchTargetAddressSet.insert(Address + Size);
       } else if (MCDesc.isReturn()) {
         RetAddressSet.insert(Address);
         UncondBranchAddrSet.insert(Address);
@@ -637,6 +639,17 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
         BranchAddressSet.insert(Address);
       }
 
+      if (MCDesc.isIndirectBranch()) {
+        IndirectBranchAddressSet.insert(Address);
+      }
+
+      // Record branch target addresses for branches and calls.
+      if (MCDesc.isCall() || MCDesc.isBranch()) {
+        uint64_t Target = 0;
+        if (MIA->evaluateBranch(Inst, Address, Size, Target))
+          BranchTargetAddressSet.insert(Target);
+      }
+
       // Record potential call targets for tail frame inference later-on.
       if (InferMissingFrames && FRange) {
         uint64_t Target = 0;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index e907c4a8a1647..f72c2ba44f5e2 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -282,6 +282,10 @@ class ProfiledBinary {
   std::set<uint64_t> UncondBranchAddrSet;
   // A set of branch instruction addresses.
   std::unordered_set<uint64_t> BranchAddressSet;
+  // A set of indirect branch instruction addresses.
+  std::unordered_set<uint64_t> IndirectBranchAddressSet;
+  // A set of branch target addresses (destinations of branches/calls).
+  std::unordered_set<uint64_t> BranchTargetAddressSet;
 
   // Estimate and track function prolog and epilog ranges.
   PrologEpilogTracker ProEpilogTracker;
@@ -465,6 +469,12 @@ class ProfiledBinary {
     return ProEpilogTracker.PrologEpilogSet.count(Address);
   }
 
+  bool addressIsBranchTarget(uint64_t Address) const {
+    return BranchTargetAddressSet.count(Address);
+  }
+  bool addressIsIndirectBranch(uint64_t Address) const {
+    return IndirectBranchAddressSet.count(Address);
+  }
   bool addressIsTransfer(uint64_t Address) {
     return BranchAddressSet.count(Address) || RetAddressSet.count(Address) ||
            CallAddressSet.count(Address);

``````````

</details>


https://github.com/llvm/llvm-project/pull/188620


More information about the llvm-commits mailing list