[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 21 13:31:31 PST 2025


================
@@ -820,6 +835,93 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
+void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
+  // Load binary functions from symbol table when Debug info is incomplete.
+  // Strip the internal suffixes which are not reflected in the DWARF info.
+  const SmallVector<StringRef, 10> Suffixes(
+      {// Internal suffixes from CoroSplit pass
+       ".cleanup", ".destroy", ".resume",
+       // Internal suffixes from Bolt
+       ".cold", ".warm",
+       // Compiler/LTO internal
+       ".llvm.", ".part.", ".isra.", ".constprop.", ".lto_priv."});
+  StringRef FileName = Obj->getFileName();
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+    const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName);
+    const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    uint64_t Size = 0;
+    if (isa<ELFObjectFileBase>(Symbol.getObject())) {
+      ELFSymbolRef ElfSymbol(Symbol);
+      Size = ElfSymbol.getSize();
+    }
+
+    if (Size == 0 || Type != SymbolRef::ST_Function)
+      continue;
+
+    const uint64_t EndAddr = StartAddr + Size;
+    const StringRef SymName =
+        FunctionSamples::getCanonicalFnName(Name, Suffixes);
+    assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress());
+
+    auto Range = findFuncRange(StartAddr);
+    if (!Range) {
+      assert(findFuncRange(EndAddr - 1) == nullptr);
+      // Function from symbol table not found previously in DWARF, store ranges.
+      auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+      auto &Func = Ret.first->second;
+      if (Ret.second) {
+        Func.FuncName = Ret.first->first;
+        HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+      }
+
+      Func.HasSymtabName = true;
+      Func.Ranges.emplace_back(StartAddr, EndAddr);
+
+      auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
+      FuncRange &FRange = R.first->second;
+
+      FRange.Func = &Func;
+      FRange.StartAddress = StartAddr;
+      FRange.EndAddress = EndAddr;
+
+    } else if (SymName != Range->getFuncName()) {
+      // Function range already found from DWARF, but the symbol name from
+      // symbol table is inconsistent with debug info. Log this discrepaency and
+      // the alternative function GUID.
+      if (ShowDetailedWarning)
+        WithColor::warning()
+            << "Conflicting name for symbol " << Name << " with range ("
+            << format("%8" PRIx64, StartAddr) << ", "
+            << format("%8" PRIx64, EndAddr) << ")"
+            << ", but the DWARF symbol " << Range->getFuncName()
+            << " indicates an overlapping range ("
+            << format("%8" PRIx64, Range->StartAddress) << ", "
+            << format("%8" PRIx64, Range->EndAddress) << ")\n";
+
+      assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
+             "Mismatched function range");
+
+      Range->Func->HasSymtabName = true;
----------------
HighW4y2H3ll wrote:

Thanks! Updated with this in the new commit! :)

https://github.com/llvm/llvm-project/pull/163654


More information about the llvm-commits mailing list