[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 31 16:56:18 PDT 2025


================
@@ -820,6 +830,63 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
+void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+  // Load binary functions from symbol table when Debug info is incomplete.
+  // Strip the internal suffixes which are not reflected in the DWARF info.
+  const SmallVector<StringRef, 6> Suffixes(
+      {
+        // Internal suffixes from CoroSplit pass
+        ".cleanup", ".destroy", ".resume",
+        // Internal suffixes from Bolt
+        ".cold", ".warm",
+        // Compiler internal
+        ".llvm.",
+      });
+  StringRef FileName = Obj->getFileName();
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+    const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName);
+    const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    uint64_t Size = 0;
+    if (isa<ELFObjectFileBase>(Symbol.getObject())) {
+      ELFSymbolRef ElfSymbol(Symbol);
+      Size = ElfSymbol.getSize();
+    }
+
+    if (Size == 0 || Type != SymbolRef::ST_Function)
+      continue;
+
+    const StringRef SymName =
+        FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+    auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+    auto &Func = Ret.first->second;
+    if (Ret.second) {
+      Func.FuncName = Ret.first->first;
+      Func.FromSymtab = true;
+      HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+    }
+
+    if (auto Range = findFuncRange(StartAddr)) {
+      if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
+        WithColor::warning()
+            << "Conflicting symbol " << Name << " already exists in DWARF as "
+            << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr)
+            << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to "
+            << format("%8" PRIx64, Range->EndAddress) << "\n";
+    } else {
+      // Store/Update Function Range from SymTab
----------------
HighW4y2H3ll wrote:

actually... we probably still need to "update" the range.. because like, coroutine creates a few functions: "foo", "foo.destroy", "foo.resume", "foo.cleanup", and each of them has a different address range in the symbol table. Even `Ret.second == false` could still be some function added from the symbol table...

Maybe the `StartAddr` works better to check if a function is already found in DWARF or not? I'm changing the logic to something like this:
```c++
auto Range = findFuncRange(StartAddr);
if (!Range || Range->StartAddress != StartAddr) {
  // Function from symbol table not found previously in DWARF, store ranges.
  ...
} else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
  // Function already found from DWARF, check consistency between symbol table and DWARF.
  WithColor::warning()...
}
```

https://github.com/llvm/llvm-project/pull/163654


More information about the llvm-commits mailing list