[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 15 15:56:32 PDT 2025
https://github.com/HighW4y2H3ll created https://github.com/llvm/llvm-project/pull/163654
Indexing in .debug_str section could lead to integer overflow when in DWARF32 format.
https://github.com/llvm/llvm-project/blob/e61e6251b692ffe71910bad22b82e41313f003cf/llvm/lib/DWP/DWP.cpp#L35C30-L35C47
This can lead to missing symbol names in the DWARF info, and hurts profile quality. As a workaround, we may use information from the symbol table, and recover the missing symbol addresses and ranges.
>From f34ab2d0d5f767d46f31452a8231e56f67ed4a21 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 9 Oct 2025 12:07:13 -0700
Subject: [PATCH] [llvm-profgen] Loading binary functions from .symtab when
DWARF info is incomplete
---
llvm/include/llvm/ProfileData/SampleProf.h | 10 ++++-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 ++++++++++++
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 44 ++++++++++++++++++++
llvm/tools/llvm-profgen/ProfiledBinary.h | 3 ++
4 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 3dd34aba2d716..4adbe13b6712b 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,12 +1214,18 @@ class FunctionSamples {
// Note the sequence of the suffixes in the knownSuffixes array matters.
// If suffix "A" is appended after the suffix "B", "A" should be in front
// of "B" in knownSuffixes.
- const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+ SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+ return getCanonicalFnName(FnName, KnownSuffixes, Attr);
+ }
+
+ static StringRef getCanonicalFnName(StringRef FnName,
+ const SmallVector<StringRef> &Suffixes,
+ StringRef Attr = "selected") {
if (Attr == "" || Attr == "all")
return FnName.split('.').first;
if (Attr == "selected") {
StringRef Cand(FnName);
- for (const auto &Suf : KnownSuffixes) {
+ for (const auto &Suf : Suffixes) {
StringRef Suffix(Suf);
// If the profile contains ".__uniq." suffix, don't strip the
// suffix for names in the IR.
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..058b154fc5a57 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,29 +449,56 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
// Go through all the stacks, ranges and branches in sample counters, use
// the start of the range to look up the function it belongs and record the
// function.
+ uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
+ uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto StackAddr : CtxKey->Context) {
+ uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+ TotalStkAddr += inc;
if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrStkAddr += inc;
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartAddress = Item.first.first;
+ uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
+ TotalFuncRange += inc;
if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrFuncRange += inc;
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceAddress = Item.first.first;
uint64_t TargetAddress = Item.first.second;
+ uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
+ uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
+ TotalSrc += srcinc;
if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrSrc += srcinc;
+ TotalTgt += tgtinc;
if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrTgt += tgtinc;
}
}
+
+ if (ErrStkAddr)
+ WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+ if (ErrFuncRange)
+ WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+ if (ErrSrc)
+ WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+ if (ErrTgt)
+ WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 94728ce4abffe..2d9a13b97114c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,6 +257,8 @@ void ProfiledBinary::load() {
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
+ populateSymbolsFromElf(Obj);
+
// Disassemble the text sections.
disassemble(Obj);
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
}
+void ProfiledBinary::populateSymbolsFromElf(
+ const ObjectFile *Obj) {
+ // Load binary functions from ELF symbol table when DWARF info is incomplete
+ StringRef FileName = Obj->getFileName();
+ for (const ELFSymbolRef Symbol : Obj->symbols()) {
+ const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+ const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+ const uint64_t Size = Symbol.getSize();
+
+ if (Size == 0 || Type != SymbolRef::ST_Function)
+ continue;
+
+ SmallVector<StringRef> Suffixes(
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+ auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+ auto &Func = Ret.first->second;
+ if (Ret.second)
+ Func.FuncName = Ret.first->first;
+
+ if (auto Range = findFuncRange(Addr)) {
+ if (Ret.second && ShowDetailedWarning)
+ WithColor::warning()
+ << "Symbol " << Name << " start address "
+ << format("%8" PRIx64, Addr) << " already exists in DWARF at "
+ << format("%8" PRIx64, Range->StartAddress) << " in function "
+ << Range->getFuncName() << "\n";
+ } else {
+ // Store/Update Function Range from SymTab
+ Func.Ranges.emplace_back(Addr, Addr + Size);
+
+ auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+ FuncRange &FRange = R.first->second;
+ FRange.Func = &Func;
+ FRange.StartAddress = Addr;
+ FRange.EndAddress = Addr + Size;
+ }
+ }
+}
+
void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
for (const auto &DieInfo : CompilationUnit.dies()) {
llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5a814b7dbd52d..238c27fbc4c9f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -356,6 +356,9 @@ class ProfiledBinary {
// Create symbol to its start address mapping.
void populateSymbolAddressList(const object::ObjectFile *O);
+ // Load functions from its symbol table (when DWARF info is missing).
+ void populateSymbolsFromElf(const object::ObjectFile *O);
+
// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start a function range is the real entry of the
// function and also set false to the non-function label.
More information about the llvm-commits
mailing list