[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 15 16:07:20 PDT 2025


https://github.com/HighW4y2H3ll updated https://github.com/llvm/llvm-project/pull/163654

>From f34ab2d0d5f767d46f31452a8231e56f67ed4a21 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 9 Oct 2025 12:07:13 -0700
Subject: [PATCH] [llvm-profgen] Loading binary functions from .symtab when
 DWARF info is incomplete

---
 llvm/include/llvm/ProfileData/SampleProf.h   | 10 ++++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 ++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   | 44 ++++++++++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.h     |  3 ++
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 3dd34aba2d716..4adbe13b6712b 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,12 +1214,18 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+    SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+    return getCanonicalFnName(FnName, KnownSuffixes, Attr);
+  }
+
+  static StringRef getCanonicalFnName(StringRef FnName,
+                                      const SmallVector<StringRef> &Suffixes,
+                                      StringRef Attr = "selected") {
     if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
     if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const auto &Suf : KnownSuffixes) {
+      for (const auto &Suf : Suffixes) {
         StringRef Suffix(Suf);
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..058b154fc5a57 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,29 +449,56 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   // Go through all the stacks, ranges and branches in sample counters, use
   // the start of the range to look up the function it belongs and record the
   // function.
+  uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
+  uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
   for (const auto &CI : *SampleCounters) {
     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
       for (auto StackAddr : CtxKey->Context) {
+        uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+        TotalStkAddr += inc;
         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
           ProfiledFunctions.insert(FRange->Func);
+        else
+          ErrStkAddr += inc;
       }
     }
 
     for (auto Item : CI.second.RangeCounter) {
       uint64_t StartAddress = Item.first.first;
+      uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
+      TotalFuncRange += inc;
       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrFuncRange += inc;
     }
 
     for (auto Item : CI.second.BranchCounter) {
       uint64_t SourceAddress = Item.first.first;
       uint64_t TargetAddress = Item.first.second;
+      uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
+      uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
+      TotalSrc += srcinc;
       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrSrc += srcinc;
+      TotalTgt += tgtinc;
       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrTgt += tgtinc;
     }
   }
+
+  if (ErrStkAddr)
+    WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+  if (ErrFuncRange)
+    WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+  if (ErrSrc)
+    WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+  if (ErrTgt)
+    WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 94728ce4abffe..2d9a13b97114c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,6 +257,8 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
+  populateSymbolsFromElf(Obj);
+
   // Disassemble the text sections.
   disassemble(Obj);
 
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
+void ProfiledBinary::populateSymbolsFromElf(
+    const ObjectFile *Obj) {
+  // Load binary functions from ELF symbol table when DWARF info is incomplete
+  StringRef FileName = Obj->getFileName();
+  for (const ELFSymbolRef Symbol : Obj->symbols()) {
+    const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+    const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+    const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    const uint64_t Size = Symbol.getSize();
+
+    if (Size == 0 || Type != SymbolRef::ST_Function)
+      continue;
+
+    SmallVector<StringRef> Suffixes(
+      {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+    const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+    auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+    auto &Func = Ret.first->second;
+    if (Ret.second)
+      Func.FuncName = Ret.first->first;
+
+    if (auto Range = findFuncRange(Addr)) {
+      if (Ret.second && ShowDetailedWarning)
+        WithColor::warning()
+            << "Symbol " << Name << " start address "
+            << format("%8" PRIx64, Addr) << " already exists in DWARF at "
+            << format("%8" PRIx64, Range->StartAddress) << " in function "
+            << Range->getFuncName() << "\n";
+    } else {
+      // Store/Update Function Range from SymTab
+      Func.Ranges.emplace_back(Addr, Addr + Size);
+
+      auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+      FuncRange &FRange = R.first->second;
+      FRange.Func = &Func;
+      FRange.StartAddress = Addr;
+      FRange.EndAddress = Addr + Size;
+    }
+  }
+}
+
 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
   for (const auto &DieInfo : CompilationUnit.dies()) {
     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5a814b7dbd52d..238c27fbc4c9f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -356,6 +356,9 @@ class ProfiledBinary {
   // Create symbol to its start address mapping.
   void populateSymbolAddressList(const object::ObjectFile *O);
 
+  // Load functions from its symbol table (when DWARF info is missing).
+  void populateSymbolsFromElf(const object::ObjectFile *O);
+
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start a function range is the real entry of the
   // function and also set false to the non-function label.



More information about the llvm-commits mailing list