[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 20:41:48 PST 2025
https://github.com/HighW4y2H3ll updated https://github.com/llvm/llvm-project/pull/163654
>From f34ab2d0d5f767d46f31452a8231e56f67ed4a21 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 9 Oct 2025 12:07:13 -0700
Subject: [PATCH 01/24] [llvm-profgen] Loading binary functions from .symtab
when DWARF info is incomplete
---
llvm/include/llvm/ProfileData/SampleProf.h | 10 ++++-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 ++++++++++++
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 44 ++++++++++++++++++++
llvm/tools/llvm-profgen/ProfiledBinary.h | 3 ++
4 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 3dd34aba2d716..4adbe13b6712b 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,12 +1214,18 @@ class FunctionSamples {
// Note the sequence of the suffixes in the knownSuffixes array matters.
// If suffix "A" is appended after the suffix "B", "A" should be in front
// of "B" in knownSuffixes.
- const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+ SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+ return getCanonicalFnName(FnName, KnownSuffixes, Attr);
+ }
+
+ static StringRef getCanonicalFnName(StringRef FnName,
+ const SmallVector<StringRef> &Suffixes,
+ StringRef Attr = "selected") {
if (Attr == "" || Attr == "all")
return FnName.split('.').first;
if (Attr == "selected") {
StringRef Cand(FnName);
- for (const auto &Suf : KnownSuffixes) {
+ for (const auto &Suf : Suffixes) {
StringRef Suffix(Suf);
// If the profile contains ".__uniq." suffix, don't strip the
// suffix for names in the IR.
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..058b154fc5a57 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,29 +449,56 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
// Go through all the stacks, ranges and branches in sample counters, use
// the start of the range to look up the function it belongs and record the
// function.
+ uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
+ uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto StackAddr : CtxKey->Context) {
+ uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+ TotalStkAddr += inc;
if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrStkAddr += inc;
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartAddress = Item.first.first;
+ uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
+ TotalFuncRange += inc;
if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrFuncRange += inc;
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceAddress = Item.first.first;
uint64_t TargetAddress = Item.first.second;
+ uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
+ uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
+ TotalSrc += srcinc;
if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrSrc += srcinc;
+ TotalTgt += tgtinc;
if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
ProfiledFunctions.insert(FRange->Func);
+ else
+ ErrTgt += tgtinc;
}
}
+
+ if (ErrStkAddr)
+ WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+ if (ErrFuncRange)
+ WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+ if (ErrSrc)
+ WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+ if (ErrTgt)
+ WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 94728ce4abffe..2d9a13b97114c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,6 +257,8 @@ void ProfiledBinary::load() {
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
+ populateSymbolsFromElf(Obj);
+
// Disassemble the text sections.
disassemble(Obj);
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
}
+void ProfiledBinary::populateSymbolsFromElf(
+ const ObjectFile *Obj) {
+ // Load binary functions from ELF symbol table when DWARF info is incomplete
+ StringRef FileName = Obj->getFileName();
+ for (const ELFSymbolRef Symbol : Obj->symbols()) {
+ const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+ const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+ const uint64_t Size = Symbol.getSize();
+
+ if (Size == 0 || Type != SymbolRef::ST_Function)
+ continue;
+
+ SmallVector<StringRef> Suffixes(
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+ auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+ auto &Func = Ret.first->second;
+ if (Ret.second)
+ Func.FuncName = Ret.first->first;
+
+ if (auto Range = findFuncRange(Addr)) {
+ if (Ret.second && ShowDetailedWarning)
+ WithColor::warning()
+ << "Symbol " << Name << " start address "
+ << format("%8" PRIx64, Addr) << " already exists in DWARF at "
+ << format("%8" PRIx64, Range->StartAddress) << " in function "
+ << Range->getFuncName() << "\n";
+ } else {
+ // Store/Update Function Range from SymTab
+ Func.Ranges.emplace_back(Addr, Addr + Size);
+
+ auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+ FuncRange &FRange = R.first->second;
+ FRange.Func = &Func;
+ FRange.StartAddress = Addr;
+ FRange.EndAddress = Addr + Size;
+ }
+ }
+}
+
void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
for (const auto &DieInfo : CompilationUnit.dies()) {
llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5a814b7dbd52d..238c27fbc4c9f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -356,6 +356,9 @@ class ProfiledBinary {
// Create symbol to its start address mapping.
void populateSymbolAddressList(const object::ObjectFile *O);
+ // Load functions from its symbol table (when DWARF info is missing).
+ void populateSymbolsFromElf(const object::ObjectFile *O);
+
// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start a function range is the real entry of the
// function and also set false to the non-function label.
>From 0fd352d28316691d97c245f80fe8205309c3b253 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 15 Oct 2025 16:25:56 -0700
Subject: [PATCH 02/24] formatting
---
llvm/include/llvm/ProfileData/SampleProf.h | 2 +-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 13 +++++++++----
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++++----
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 4adbe13b6712b..dadf718d0b904 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,7 +1214,7 @@ class FunctionSamples {
// Note the sequence of the suffixes in the knownSuffixes array matters.
// If suffix "A" is appended after the suffix "B", "A" should be in front
// of "B" in knownSuffixes.
- SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+ SmallVector<StringRef> KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix});
return getCanonicalFnName(FnName, KnownSuffixes, Attr);
}
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 058b154fc5a57..0478d5568085a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -492,13 +492,18 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
}
if (ErrStkAddr)
- WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+ WithColor::warning() << "Cannot find Stack Address from DWARF Info: "
+ << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
if (ErrFuncRange)
- WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+ WithColor::warning() << "Cannot find Function Range from DWARF Info: "
+ << ErrFuncRange << "/" << TotalFuncRange
+ << " missing\n";
if (ErrSrc)
- WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+ WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: "
+ << ErrSrc << "/" << TotalSrc << " missing\n";
if (ErrTgt)
- WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
+ WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: "
+ << ErrTgt << "/" << TotalTgt << " missing\n";
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2d9a13b97114c..aa385c0db50db 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -822,8 +822,7 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
}
-void ProfiledBinary::populateSymbolsFromElf(
- const ObjectFile *Obj) {
+void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) {
// Load binary functions from ELF symbol table when DWARF info is incomplete
StringRef FileName = Obj->getFileName();
for (const ELFSymbolRef Symbol : Obj->symbols()) {
@@ -836,8 +835,9 @@ void ProfiledBinary::populateSymbolsFromElf(
continue;
SmallVector<StringRef> Suffixes(
- {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
- const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ const StringRef SymName =
+ FunctionSamples::getCanonicalFnName(Name, Suffixes);
auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
auto &Func = Ret.first->second;
>From c097d374402f11fe00d997495b10a834ff6a4d9e Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 16 Oct 2025 10:59:47 -0700
Subject: [PATCH 03/24] Fix branch target check when an instruction branches to
itself. (i.e. jmp 0)
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index aa385c0db50db..2ceeba28f77a8 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -606,13 +606,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
// Record potential call targets for tail frame inference later-on.
if (InferMissingFrames && FRange) {
uint64_t Target = 0;
- MIA->evaluateBranch(Inst, Address, Size, Target);
+ bool Err = MIA->evaluateBranch(Inst, Address, Size, Target);
if (MCDesc.isCall()) {
// Indirect call targets are unknown at this point. Recording the
// unknown target (zero) for further LBR-based refinement.
MissingContextInferrer->CallEdges[Address].insert(Target);
} else if (MCDesc.isUnconditionalBranch()) {
- assert(Target &&
+ assert(Err &&
"target should be known for unconditional direct branch");
// Any inter-function unconditional jump is considered tail call at
// this point. This is not 100% accurate and could further be
>From a19064d73c04e68757b2cf1323c78b40b649f75f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 16 Oct 2025 22:36:53 -0700
Subject: [PATCH 04/24] Making the API compatible with non-ELF binaries
---
llvm/include/llvm/Object/ObjectFile.h | 5 +++++
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++++----
llvm/tools/llvm-profgen/ProfiledBinary.h | 2 +-
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 289cc770e3466..6ceedd2d310f7 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -198,6 +198,7 @@ class SymbolRef : public BasicSymbolRef {
/// Get the alignment of this symbol as the actual value (not log 2).
uint32_t getAlignment() const;
uint64_t getCommonSize() const;
+ uint64_t getSize() const;
Expected<SymbolRef::Type> getType() const;
/// Get section this symbol is defined in reference to. Result is
@@ -482,6 +483,10 @@ inline uint64_t SymbolRef::getCommonSize() const {
return getObject()->getCommonSymbolSize(getRawDataRefImpl());
}
+inline uint64_t SymbolRef::getSize() const {
+ return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl());
+}
+
inline Expected<section_iterator> SymbolRef::getSection() const {
return getObject()->getSymbolSection(getRawDataRefImpl());
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2ceeba28f77a8..c9561aa9cfb3c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,7 +257,7 @@ void ProfiledBinary::load() {
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
- populateSymbolsFromElf(Obj);
+ populateSymbolsFromBinary(Obj);
// Disassemble the text sections.
disassemble(Obj);
@@ -822,10 +822,10 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
}
-void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) {
- // Load binary functions from ELF symbol table when DWARF info is incomplete
+void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+ // Load binary functions from symbol table when Debug info is incomplete
StringRef FileName = Obj->getFileName();
- for (const ELFSymbolRef Symbol : Obj->symbols()) {
+ for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 238c27fbc4c9f..e73ffd3143e3d 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -357,7 +357,7 @@ class ProfiledBinary {
void populateSymbolAddressList(const object::ObjectFile *O);
// Load functions from its symbol table (when DWARF info is missing).
- void populateSymbolsFromElf(const object::ObjectFile *O);
+ void populateSymbolsFromBinary(const object::ObjectFile *O);
// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start a function range is the real entry of the
>From e12e694c1b9e3563dd8351e225b7acec05e12d5a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 20 Oct 2025 09:40:57 -0700
Subject: [PATCH 05/24] Fix
---
llvm/include/llvm/ProfileData/SampleProf.h | 9 ++--
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 43 ++++++++++----------
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 +-
3 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index dadf718d0b904..6de5884253017 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,19 +1214,18 @@ class FunctionSamples {
// Note the sequence of the suffixes in the knownSuffixes array matters.
// If suffix "A" is appended after the suffix "B", "A" should be in front
// of "B" in knownSuffixes.
- SmallVector<StringRef> KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix});
+ const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr};
return getCanonicalFnName(FnName, KnownSuffixes, Attr);
}
- static StringRef getCanonicalFnName(StringRef FnName,
- const SmallVector<StringRef> &Suffixes,
+ static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[],
StringRef Attr = "selected") {
if (Attr == "" || Attr == "all")
return FnName.split('.').first;
if (Attr == "selected") {
StringRef Cand(FnName);
- for (const auto &Suf : Suffixes) {
- StringRef Suffix(Suf);
+ for (const char **Suf = Suffixes; *Suf; Suf++) {
+ StringRef Suffix(*Suf);
// If the profile contains ".__uniq." suffix, don't strip the
// suffix for names in the IR.
if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0478d5568085a..2f6f50912fbcf 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -454,56 +454,57 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto StackAddr : CtxKey->Context) {
- uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
- TotalStkAddr += inc;
+ uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+ TotalStkAddr += Inc;
if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
ProfiledFunctions.insert(FRange->Func);
else
- ErrStkAddr += inc;
+ ErrStkAddr += Inc;
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartAddress = Item.first.first;
- uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
- TotalFuncRange += inc;
+ uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0;
+ TotalFuncRange += Inc;
if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
ProfiledFunctions.insert(FRange->Func);
else
- ErrFuncRange += inc;
+ ErrFuncRange += Inc;
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceAddress = Item.first.first;
uint64_t TargetAddress = Item.first.second;
- uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
- uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
- TotalSrc += srcinc;
+ uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0;
+ uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0;
+ TotalSrc += SrcInc;
if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
ProfiledFunctions.insert(FRange->Func);
else
- ErrSrc += srcinc;
- TotalTgt += tgtinc;
+ ErrSrc += SrcInc;
+ TotalTgt += TgtInc;
if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
ProfiledFunctions.insert(FRange->Func);
else
- ErrTgt += tgtinc;
+ ErrTgt += TgtInc;
}
}
if (ErrStkAddr)
- WithColor::warning() << "Cannot find Stack Address from DWARF Info: "
- << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+ emitWarningSummary(
+ ErrStkAddr, TotalStkAddr,
+ "of stack address samples do not belong to any function");
if (ErrFuncRange)
- WithColor::warning() << "Cannot find Function Range from DWARF Info: "
- << ErrFuncRange << "/" << TotalFuncRange
- << " missing\n";
+ emitWarningSummary(
+ ErrFuncRange, TotalFuncRange,
+ "of function range samples do not belong to any function");
if (ErrSrc)
- WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: "
- << ErrSrc << "/" << TotalSrc << " missing\n";
+ emitWarningSummary(ErrSrc, TotalSrc,
+ "of LBR source samples do not belong to any function");
if (ErrTgt)
- WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: "
- << ErrTgt << "/" << TotalTgt << " missing\n";
+ emitWarningSummary(ErrTgt, TotalTgt,
+ "of LBR target samples do not belong to any function");
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index c9561aa9cfb3c..1dab93fc871d2 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -834,8 +834,8 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
if (Size == 0 || Type != SymbolRef::ST_Function)
continue;
- SmallVector<StringRef> Suffixes(
- {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
+ ".cold", ".warm", nullptr};
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
>From 5eead6b006e3a223b52b29078d449d3cf1a137d8 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 22 Oct 2025 16:29:30 -0700
Subject: [PATCH 06/24] Clean up getSymbolSize API and warnings
---
llvm/include/llvm/Object/ELFObjectFile.h | 6 ++++++
llvm/include/llvm/Object/ObjectFile.h | 3 ++-
llvm/tools/llvm-profgen/PerfReader.cpp | 11 +----------
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++--
4 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index ced1afdd4cc6a..cb7e6ef3458a9 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -310,6 +310,7 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+ uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override;
Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
uint8_t getSymbolBinding(DataRefImpl Symb) const override;
uint8_t getSymbolOther(DataRefImpl Symb) const override;
@@ -703,6 +704,11 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
return getSymbolSize(Symb);
}
+template <class ELFT>
+uint64_t ELFObjectFile<ELFT>::getSymbolSizeImpl(DataRefImpl Symb) const {
+ return getSymbolSize(Symb);
+}
+
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 6ceedd2d310f7..bea61cf7c2214 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -256,6 +256,7 @@ class LLVM_ABI ObjectFile : public SymbolicFile {
virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
+ virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; }
virtual Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const = 0;
virtual Expected<section_iterator>
getSymbolSection(DataRefImpl Symb) const = 0;
@@ -484,7 +485,7 @@ inline uint64_t SymbolRef::getCommonSize() const {
}
inline uint64_t SymbolRef::getSize() const {
- return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl());
+ return getObject()->getSymbolSizeImpl(getRawDataRefImpl());
}
inline Expected<section_iterator> SymbolRef::getSection() const {
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 183b248a72320..e9f7b666c95c7 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1274,8 +1274,6 @@ void PerfScriptReader::warnInvalidRange() {
const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
"likely due to profile and binary mismatch.";
- const char *DanglingRangeMsg = "Range does not belong to any functions, "
- "likely from PLT, .init or .fini section.";
const char *RangeCrossFuncMsg =
"Fall through range should not cross function boundaries, likely due to "
"profile and binary mismatch.";
@@ -1283,7 +1281,6 @@ void PerfScriptReader::warnInvalidRange() {
uint64_t TotalRangeNum = 0;
uint64_t InstNotBoundary = 0;
- uint64_t UnmatchedRange = 0;
uint64_t RangeCrossFunc = 0;
uint64_t BogusRange = 0;
@@ -1303,11 +1300,8 @@ void PerfScriptReader::warnInvalidRange() {
}
auto *FRange = Binary->findFuncRange(StartAddress);
- if (!FRange) {
- UnmatchedRange += I.second;
- WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
+ if (!FRange)
continue;
- }
if (EndAddress >= FRange->EndAddress) {
RangeCrossFunc += I.second;
@@ -1325,9 +1319,6 @@ void PerfScriptReader::warnInvalidRange() {
emitWarningSummary(
InstNotBoundary, TotalRangeNum,
"of samples are from ranges that are not on instruction boundary.");
- emitWarningSummary(
- UnmatchedRange, TotalRangeNum,
- "of samples are from ranges that do not belong to any functions.");
emitWarningSummary(
RangeCrossFunc, TotalRangeNum,
"of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 1dab93fc871d2..469d31d95c00a 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -824,6 +824,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
// Load binary functions from symbol table when Debug info is incomplete
+ const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
+ ".cold", ".warm", nullptr};
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -834,8 +836,6 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
if (Size == 0 || Type != SymbolRef::ST_Function)
continue;
- const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
- ".cold", ".warm", nullptr};
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
>From 8f59bfa035070c3cf638f696730156d26a5165fe Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 22 Oct 2025 20:12:53 -0700
Subject: [PATCH 07/24] Add cmdline option --load-function-from-symbol
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 469d31d95c00a..4a88c2becf133 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -65,6 +65,13 @@ static cl::list<std::string> DisassembleFunctions(
"names only. Only work with show-disassembly-only"),
cl::cat(ProfGenCategory));
+static cl::opt<bool>
+ LoadFunctionFromSymbol("load-function-from-symbol",
+ cl::desc("Gather additional binary function info "
+ "from symbols (e.g. .symtab) in case "
+ "dwarf info is incomplete."),
+ cl::cat(ProfGenCategory));
+
static cl::opt<bool>
KernelBinary("kernel",
cl::desc("Generate the profile for Linux kernel binary."),
@@ -257,7 +264,8 @@ void ProfiledBinary::load() {
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
- populateSymbolsFromBinary(Obj);
+ if (LoadFunctionFromSymbol || UsePseudoProbes)
+ populateSymbolsFromBinary(Obj);
// Disassemble the text sections.
disassemble(Obj);
>From a967994990b9bdd9dcd35c9739d1a8442c694d0a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 23 Oct 2025 10:25:29 -0700
Subject: [PATCH 08/24] Get symbol size only for ELFObjectFile
---
llvm/include/llvm/Object/ELFObjectFile.h | 6 ------
llvm/include/llvm/Object/ObjectFile.h | 6 ------
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 +++++-
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index cb7e6ef3458a9..ced1afdd4cc6a 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -310,7 +310,6 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
- uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override;
Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
uint8_t getSymbolBinding(DataRefImpl Symb) const override;
uint8_t getSymbolOther(DataRefImpl Symb) const override;
@@ -704,11 +703,6 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
return getSymbolSize(Symb);
}
-template <class ELFT>
-uint64_t ELFObjectFile<ELFT>::getSymbolSizeImpl(DataRefImpl Symb) const {
- return getSymbolSize(Symb);
-}
-
template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index bea61cf7c2214..289cc770e3466 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -198,7 +198,6 @@ class SymbolRef : public BasicSymbolRef {
/// Get the alignment of this symbol as the actual value (not log 2).
uint32_t getAlignment() const;
uint64_t getCommonSize() const;
- uint64_t getSize() const;
Expected<SymbolRef::Type> getType() const;
/// Get section this symbol is defined in reference to. Result is
@@ -256,7 +255,6 @@ class LLVM_ABI ObjectFile : public SymbolicFile {
virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
- virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; }
virtual Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const = 0;
virtual Expected<section_iterator>
getSymbolSection(DataRefImpl Symb) const = 0;
@@ -484,10 +482,6 @@ inline uint64_t SymbolRef::getCommonSize() const {
return getObject()->getCommonSymbolSize(getRawDataRefImpl());
}
-inline uint64_t SymbolRef::getSize() const {
- return getObject()->getSymbolSizeImpl(getRawDataRefImpl());
-}
-
inline Expected<section_iterator> SymbolRef::getSection() const {
return getObject()->getSymbolSection(getRawDataRefImpl());
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 4a88c2becf133..93f605a891afc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -839,7 +839,11 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
- const uint64_t Size = Symbol.getSize();
+ uint64_t Size = 0;
+ if (isa<ELFObjectFileBase>(Symbol.getObject())) {
+ ELFSymbolRef ElfSymbol(Symbol);
+ Size = ElfSymbol.getSize();
+ }
if (Size == 0 || Type != SymbolRef::ST_Function)
continue;
>From 0dc2c669f3639fb91545288ba76647deec172978 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 23 Oct 2025 14:49:54 -0700
Subject: [PATCH 09/24] Add unit test
---
.../llvm-profgen/Inputs/missing-dwarf.exe | Bin 0 -> 18744 bytes
.../tools/llvm-profgen/missing-dwarf.test | 40 ++++++++++++++++++
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++--
3 files changed, 45 insertions(+), 3 deletions(-)
create mode 100755 llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe
create mode 100644 llvm/test/tools/llvm-profgen/missing-dwarf.test
diff --git a/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe
new file mode 100755
index 0000000000000000000000000000000000000000..c4b8af0bf1f2ab87e195231a326fc756a7b46f23
GIT binary patch
literal 18744
zcmeHPYiu0V6+W|LI}Rc7CJ+d9h^OUIN2+IcZO5?<NyhPOU7X;?CIpEvncW%N3+vtO
z?rgA8MJ)n|&?31=rASDv`lAx6_|XUI;|HoCh_rtYP*t=dQbXIQNR&3Hr46*o_MAKS
ztaoR3L)1!brOuUS&pqEg-??|row;-8as9>7$+3{40H+Y_1=6&zAt3J;pnsoGCDsoc
z!GKQqG_)YngqtBsNP0PzzYKW=EL)|WijsKSC?lj%CASL=fF-w(B=OdgA+w%rZT3`_
z5cYK9i6W_k+&`8RvI3TsJW-$K`2o%FGE%-QVV0FVijARr2F6}PWLTb&^_`WHc@!?<
zO-bIA<gr|l_AEJ`)JEF+Wjy;;12xh;sgZ(76Tbj{-Vw<=BJEkSJxi*C()2mk{#WXo
zlDx1c+(1~yk>zJR7+7+BpF<wS<#H2u^C4MZqkdRbJdZ4;3Xyaw(bpSE+g<5YrnuO(
z*x%RH*Q*z at dbc1@yQmI2rp6E60ik7BB}|4<^aYiZKPsn0Wz at fv2<J at zm-$1SEp===
z5!346BMtEB1~`}|?s?KIxK`dZ7pznUNT(g|gYn6U{ljLr-qQ=_#LNNHcJj_#s^B{L
znFGV=Y{r?f5@`or%q?Uy(uL`n)-}X7ti?ltt5D~Dd;Tj=VRwh%dc-K5-De0%#{k8E
z(>oP7`=YY*6*%_^;TI}!KDG!CQV at Y40zm|V2m}!bA`nC%h(Hj5|Az?txoyWs at e_Y&
zjen{6?Fc~pkySTTdM$q9<<?VT_o+Pg-o(k*?}?xMP5i{~&QHyZ##UoLi=RHNZ8o5^
z^%-RSWma$Nc(`nTYE4~<Ke6Y><oAi@?~xd|;BLjLGP0`mrL%1v%S1dSCEA}9_PZV;
zvGZd5<oWoEf7};;@q!vxUWvbc(Y+cS?2!&yOJ`@>I`Afk`PAO!JwsRw#jEa!pFH=@
z1Eo^wmF5T{%F$CRr)Jk0*VbVG*}wbv8b*{K(GA8?^oV{*$M}R#kN$(2`OX>i7F)&m
zgpZyo$F3YLG6_-;fgl1w1cC?z5eOm>L?DPj5P={9K?H&b1QGb(i-1yo1Lw6(X;*Kd
zcdztrEPTFHila18-c7~tmr6OpQLd1V>1t1a^1vZb7Q;&WmFrsR-88+&+m74npGu{h
z$!26-n6M~rXAqOG>4gp6X2WAx#|D%zv?=^(Xn14G7&=0geAu`xVmal@<#&4J1O+LG
zKoEf-0zm|V2m}!bA`nC%h(Hj5AOb-I{sR%fm%x->liySGx0&=UC#5Srfj5MP$ZeGR
zt&(@0)cL#UcB%9C$)AvVe6JyL;vNulT>inuQkDu9RfB~0jW^10lR>KZmYU?#viu5C
zFfDij#62hGw at H1glq}ZC%(K!T*LzIj>!oD=#gf;a$D}>JE*9<MxZrK9&>xiLlKx*4
z{9Spy28<664{DwG_3}<F7Sng>QLQ`L-5cE<8_+rrIkpzJTv^Z+i|vGD+RDsn$DMp3
zmCb0~-Fj4y&ZS(fbKZ4xg~3P!mHA>qPi7Y)>GW~$rz at Aw-s>b?oh+?^zTE=@y#oW$
z#6ZHf2kfNXvnx5UyJx_TS^d4e-O0XzUOQ^-grUQOy}4NTKpz}BJeV(LGAXQjE}8T<
z6!sq;v@)4&B9*CIJbve3bg{?oj_&HSG+mDrmI}6$D at 5>f?}(L1b!D=yqbFf{YOn{U
zhX-RYJT};W3B$3;!LHaP%EyP$$J7LQ8y$>7Y8rp4+K|emT&z%3BlDh5sZ<%`N7`;G
z*_C(F*(8&Em`C=3&%Lnit;v at z+>Hlki8@}L&1OIP#S_2pv%mV5hbhoR-+nU~0%f@^
z^bjynLA^<wgY-;?KFA&)x>4z|V&!!sWAc2b^Tls-hb&`R&xXf~YcAo3$B+>@melie
zO!8YUVZ(k{*7L(<EbG~DTiY+uW^U_*Y|o7tpJr$xcko91RrBpP-g`rg6#`MjIta=t
zpM~(X`D@#6dVc?P$8Xx)zNMvbEy6U6HDgEYCKE;Z`=*T~$N_Gc=@7nDr6bhVyh9Nm
z$p7>n{rq!}+<W40<J;fd-ScuBpY*K{g=KS;X7!5OS~eVMhL-!ARcLAY+Vb+Rlq=5N
z)v`ekwV+2G7E*-*o^4(B{Z at Wfx9`u2F`gR1PIPdc0v*LP`16B4s{50;c^~O_VvdIP
zC?0vRlQLqd0cL=9Yt5(^wUt!HrIL?JD(>_))u=I3(O-XET!kZb at uu?iq|%v+JHywJ
zYP_X<y{X36LA`Na#aQw6q1t{u at O7pd$E%h+rmFD`<@2o?zY_R3t;X@{s~MG$$g8la
zybh<bP=z)y>e`2auh-S~n}M&>)%X^ucb!pj#RsqJsJ6$a%7#%1iM$G1VOw4N8rWVJ
zr(>x$spYGEb(so08}Z{j3-ROapdB-+LDbaps<rAOwY-|ekMF1kq^3eg16*r>(+O6a
zRQOaah=>X|G{9+8s!j^3P51(!8U<V}NJGuqd8A2$ILdnV(};&~*2UwpGzVHCMfi&E
zIG>j|&o9~jLCh$y*6-9lN{{2FK<#n(UBt;h$bpV*DZPN3f>$X2 at q7mHdhKttU%wT8
z+F(^PFjt-G`FS5r6sSEfJ`&X0^Km2Yph9n3e8>3>h;ON`PG|<=S{?j8f!Cf#cOxFg
z^UHr8B at y2esvS>K(%yeQ9vA*=D}Ds=&7qCZ>Wj~lgyU*b9+&WHB+ at q#--7+c*A$+o
ze+%(2_Pgeb!?(#kv;n5%@xzy$tn{Nv{ATI at CF#e%zpt{rJWlw2<L8KP4sETe_t%JP
z(r2akko<(|jN6f7Ay117BRE^7X{OoNixW6|-g3Q_hLKn-f)mm5pSKP%nRjD)7FX8c
zq||h+IY6M`7V%@&B&@B%Gu;I<N$c<mXlZB7xpX#RrA^z-<_o4(Tm+nY=hBYr*m|_S
zg*l7!Ow-EettHdRxcMcR&07nOX%`n3mXK1RnKUD<GL=_-(;PcAbYRpRJvd_G>c5e@
z4h|id7)CPQNtmN?SsEWX1m^hU?fZu&&D+PurblPYnW6oYqsZ{DKr}9Q-JxS!t_5Nr
zX!lU at 79pZiy&}=H3t4mC%Gk78apHEgvQrteSa58h2?_4H4W^wo#mw4r9dd&+Z8Mjm
z)s2ZlLHZSw7SqJlkG!|EEVAxeM-eUny|A?4S_zbH-jnm3!#&w(!PIp#7&9dC_t
zPTpOT(9r&gF6w%n%I2-YJm~gP2K{-`&3h&C2CPaku?+XcA_C~taiF`-BK}f;V_8;2
zPIu;I56|1kV9jHAUF|VAkM696R1)jTqA&c>b7{<Y&M-_sH+u_sAb~FL!o=Poo+~&-
zJVJ376L at qv1@ut~d96<rQ)yebTsNOe6!GXn&rYH^hoPL!a^6ZL^3HMAaIvn#d5kM(
zEt~eoBa;bsUs=bjnVp?2IBwZL2H4CM6S!|{bxO=#$|3geCwk6-Lj+!vyu|ZXUJuXX
zjw}|ooF_?FZhAJse4gL(`fm-(DpPBvg84q&D)~GQ=5^qj&z{F+hDR{-SA3Si^)IT1
zC|XoK3vNG;!~XgYq8W|p%zs+)pO*YZ&y9F~Lvdn0KW90N7 at 0DkpZA1iL;3lRbW;<*
zd$53>%`l(m=?_XiKPTe$vmML(kWbHgSm*itW0J4Qid#@twqL^V0A|RP`8 at A`R`QQY
zL$2Sy{|{q1`6d4%@w`dgYy<FfG%l8IS(WpoH9lV-HOb#j&MT6H%g74mD(mO_jaxT+
zCU`}!O8)wv!cyY%{`2)8G>_)5kN4jz1^7B<D*62X1Ni?6 at c$2Bdw>1U`S^xr2vb87
z0w}(f$zT5uuy7M@@^CHBANl_b`0ZI|`4V#5D)@Zg6qo<^f$h*lB-UBJiYWCx*U$G?
z{C^|(|3mQc!90%Zo5-d3F`w_l&PEW#q>10PALg^9`Eez`JTHxU96nFk1M^s(^YM8e
zxlQu@{Y7<Ergu>xU)+A4$8En%{(0n*Pic-hFERfa2BhTrS$>FkJ^l}+!@X?SfZvMc
zJ`-X2LCho2Gm*;Vw{4gF<w_&W at 96e`>m|<fW<P%?0{m>7O!+?cR>>dn5&ZqJPu4H1
zDU0PEFWXs0yo}Es+}42qtmOOe-2ME!8t{K2`Jexo`W?yl_kWZgIAOv&t*XTJv&=Q9
ze?;=bRqWcn{z8NLXC$BBE3g}`m*tlm)PM9c>;Far{(|J+#1%`)^)meZ2K*-_-+%q_
z^GPp{{p=LwcO-vR4qP`QMspP&H+Wp3`0%-p*>cL=;$=&|3VkZxbJpi)_jvr%K7vn$
Fe*(I&+5G?j
literal 0
HcmV?d00001
diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
new file mode 100644
index 0000000000000..0f0b8bc30de47
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -0,0 +1,40 @@
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: cd %t
+
+; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof
+; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe
+
+; Test --load-function-from-symbol=0
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
+
+; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
+
+; Test --load-function-from-symbol=1
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB
+
+; CHECK-LOAD-SYMTAB: main:2:1
+; CHECK-LOAD-SYMTAB-NEXT: 1: 1
+; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1
+; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951
+; CHECK-LOAD-SYMTAB-NEXT: foo:0:0
+; CHECK-LOAD-SYMTAB-NEXT: 1: 0
+; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295
+
+; Build instructions:
+; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c
+; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g
+
+; Source code:
+
+int foo() {
+ return 1;
+}
+
+int main() {
+ foo();
+ return 0;
+}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 93f605a891afc..f601fc133df1c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -66,7 +66,7 @@ static cl::list<std::string> DisassembleFunctions(
cl::cat(ProfGenCategory));
static cl::opt<bool>
- LoadFunctionFromSymbol("load-function-from-symbol",
+ LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
cl::desc("Gather additional binary function info "
"from symbols (e.g. .symtab) in case "
"dwarf info is incomplete."),
@@ -264,7 +264,7 @@ void ProfiledBinary::load() {
if (ShowDisassemblyOnly)
decodePseudoProbe(Obj);
- if (LoadFunctionFromSymbol || UsePseudoProbes)
+ if (LoadFunctionFromSymbol && UsePseudoProbes)
populateSymbolsFromBinary(Obj);
// Disassemble the text sections.
@@ -853,8 +853,10 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
auto &Func = Ret.first->second;
- if (Ret.second)
+ if (Ret.second) {
Func.FuncName = Ret.first->first;
+ HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+ }
if (auto Range = findFuncRange(Addr)) {
if (Ret.second && ShowDetailedWarning)
>From 75dc996056424a43aa3b68be63f14cdf106ba340 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 24 Oct 2025 16:16:40 -0700
Subject: [PATCH 10/24] Nit
---
llvm/include/llvm/ProfileData/SampleProf.h | 9 +++++----
llvm/test/tools/llvm-profgen/missing-dwarf.test | 5 ++---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 6de5884253017..dd0495f29e8c3 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,18 +1214,19 @@ class FunctionSamples {
// Note the sequence of the suffixes in the knownSuffixes array matters.
// If suffix "A" is appended after the suffix "B", "A" should be in front
// of "B" in knownSuffixes.
- const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr};
+ const SmallVector<StringRef> KnownSuffixes{LLVMSuffix, PartSuffix,
+ UniqSuffix};
return getCanonicalFnName(FnName, KnownSuffixes, Attr);
}
- static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[],
+ static StringRef getCanonicalFnName(StringRef FnName,
+ ArrayRef<StringRef> Suffixes,
StringRef Attr = "selected") {
if (Attr == "" || Attr == "all")
return FnName.split('.').first;
if (Attr == "selected") {
StringRef Cand(FnName);
- for (const char **Suf = Suffixes; *Suf; Suf++) {
- StringRef Suffix(*Suf);
+ for (const auto Suffix : Suffixes) {
// If the profile contains ".__uniq." suffix, don't strip the
// suffix for names in the IR.
if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
index 0f0b8bc30de47..0fc0d660133f1 100644
--- a/llvm/test/tools/llvm-profgen/missing-dwarf.test
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -3,17 +3,16 @@
; RUN: cd %t
; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof
-; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe
; Test --load-function-from-symbol=0
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function
; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
; Test --load-function-from-symbol=1
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB
; CHECK-LOAD-SYMTAB: main:2:1
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index f601fc133df1c..c1c0ea5fe6b3e 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -832,8 +832,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
// Load binary functions from symbol table when Debug info is incomplete
- const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
- ".cold", ".warm", nullptr};
+ const SmallVector<StringRef> Suffixes(
+ {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
>From 5600e83a5f36ce7fe754f7a71a764d0cf35af15f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 29 Oct 2025 17:21:49 -0700
Subject: [PATCH 11/24] Cleanup loggings and comments
---
.../tools/llvm-profgen/missing-dwarf.test | 4 +-
llvm/tools/llvm-profgen/PerfReader.cpp | 18 +++++++-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 33 ---------------
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 41 ++++++++++++-------
llvm/tools/llvm-profgen/ProfiledBinary.h | 1 +
5 files changed, 45 insertions(+), 52 deletions(-)
diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
index 0fc0d660133f1..b96ae9018dae1 100644
--- a/llvm/test/tools/llvm-profgen/missing-dwarf.test
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -7,9 +7,7 @@
; Test --load-function-from-symbol=0
; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
-; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function
-; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
-; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB: warning: Loading of DWARF info completed, but no binary functions have been retrieved.
; Test --load-function-from-symbol=1
; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e9f7b666c95c7..1c4b79554e1a5 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1274,6 +1274,8 @@ void PerfScriptReader::warnInvalidRange() {
const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
"likely due to profile and binary mismatch.";
+ const char *DanglingRangeMsg = "Range does not belong to any functions, "
+ "likely from PLT, .init or .fini section.";
const char *RangeCrossFuncMsg =
"Fall through range should not cross function boundaries, likely due to "
"profile and binary mismatch.";
@@ -1281,6 +1283,8 @@ void PerfScriptReader::warnInvalidRange() {
uint64_t TotalRangeNum = 0;
uint64_t InstNotBoundary = 0;
+ uint64_t UnmatchedRange = 0;
+ uint64_t RecoveredRange = 0;
uint64_t RangeCrossFunc = 0;
uint64_t BogusRange = 0;
@@ -1300,8 +1304,14 @@ void PerfScriptReader::warnInvalidRange() {
}
auto *FRange = Binary->findFuncRange(StartAddress);
- if (!FRange)
+ if (!FRange) {
+ UnmatchedRange += I.second;
+ WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
continue;
+ }
+
+ if (FRange->Func->FromSymtab)
+ RecoveredRange += I.second;
if (EndAddress >= FRange->EndAddress) {
RangeCrossFunc += I.second;
@@ -1319,6 +1329,12 @@ void PerfScriptReader::warnInvalidRange() {
emitWarningSummary(
InstNotBoundary, TotalRangeNum,
"of samples are from ranges that are not on instruction boundary.");
+ emitWarningSummary(
+ UnmatchedRange, TotalRangeNum,
+ "of samples are from ranges that do not belong to any functions.");
+ emitWarningSummary(
+ RecoveredRange, TotalRangeNum,
+ "of samples are from ranges that belong to functions recovered from symbol table.");
emitWarningSummary(
RangeCrossFunc, TotalRangeNum,
"of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 2f6f50912fbcf..3b875c5de3c09 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,62 +449,29 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
// Go through all the stacks, ranges and branches in sample counters, use
// the start of the range to look up the function it belongs and record the
// function.
- uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
- uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto StackAddr : CtxKey->Context) {
- uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
- TotalStkAddr += Inc;
if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
ProfiledFunctions.insert(FRange->Func);
- else
- ErrStkAddr += Inc;
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartAddress = Item.first.first;
- uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0;
- TotalFuncRange += Inc;
if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
ProfiledFunctions.insert(FRange->Func);
- else
- ErrFuncRange += Inc;
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceAddress = Item.first.first;
uint64_t TargetAddress = Item.first.second;
- uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0;
- uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0;
- TotalSrc += SrcInc;
if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
ProfiledFunctions.insert(FRange->Func);
- else
- ErrSrc += SrcInc;
- TotalTgt += TgtInc;
if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
ProfiledFunctions.insert(FRange->Func);
- else
- ErrTgt += TgtInc;
}
}
-
- if (ErrStkAddr)
- emitWarningSummary(
- ErrStkAddr, TotalStkAddr,
- "of stack address samples do not belong to any function");
- if (ErrFuncRange)
- emitWarningSummary(
- ErrFuncRange, TotalFuncRange,
- "of function range samples do not belong to any function");
- if (ErrSrc)
- emitWarningSummary(ErrSrc, TotalSrc,
- "of LBR source samples do not belong to any function");
- if (ErrTgt)
- emitWarningSummary(ErrTgt, TotalTgt,
- "of LBR target samples do not belong to any function");
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index c1c0ea5fe6b3e..e65bafa9ac4ca 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -831,13 +831,21 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
- // Load binary functions from symbol table when Debug info is incomplete
- const SmallVector<StringRef> Suffixes(
- {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+ // Load binary functions from symbol table when Debug info is incomplete.
+ // Strip the internal suffixes which are not reflected in the DWARF info.
+ const SmallVector<StringRef, 6> Suffixes(
+ {
+ // Internal suffixes from CoroSplit pass
+ ".cleanup", ".destroy", ".resume",
+ // Internal suffixes from Bolt
+ ".cold", ".warm",
+ // Compiler internal
+ ".llvm.",
+ });
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
- const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
uint64_t Size = 0;
if (isa<ELFObjectFileBase>(Symbol.getObject())) {
@@ -855,25 +863,26 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
auto &Func = Ret.first->second;
if (Ret.second) {
Func.FuncName = Ret.first->first;
+ Func.FromSymtab = true;
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
}
- if (auto Range = findFuncRange(Addr)) {
- if (Ret.second && ShowDetailedWarning)
+ if (auto Range = findFuncRange(StartAddr)) {
+ if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
WithColor::warning()
- << "Symbol " << Name << " start address "
- << format("%8" PRIx64, Addr) << " already exists in DWARF at "
- << format("%8" PRIx64, Range->StartAddress) << " in function "
- << Range->getFuncName() << "\n";
+ << "Conflicting symbol " << Name << " already exists in DWARF as "
+ << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr)
+ << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to "
+ << format("%8" PRIx64, Range->EndAddress) << "\n";
} else {
// Store/Update Function Range from SymTab
- Func.Ranges.emplace_back(Addr, Addr + Size);
+ Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
- auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+ auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
FuncRange &FRange = R.first->second;
FRange.Func = &Func;
- FRange.StartAddress = Addr;
- FRange.EndAddress = Addr + Size;
+ FRange.StartAddress = StartAddr;
+ FRange.EndAddress = StartAddr + Size;
}
}
}
@@ -902,8 +911,10 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
// BinaryFunction indexed by the name.
auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
auto &Func = Ret.first->second;
- if (Ret.second)
+ if (Ret.second) {
Func.FuncName = Ret.first->first;
+ Func.FromSymtab = false;
+ }
for (const auto &Range : Ranges) {
uint64_t StartAddress = Range.LowPC;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index e73ffd3143e3d..753fcc935716b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,6 +76,7 @@ struct BinaryFunction {
StringRef FuncName;
// End of range is an exclusive bound.
RangesTy Ranges;
+ bool FromSymtab;
uint64_t getFuncSize() {
uint64_t Sum = 0;
>From 0df504e8a998902821fe40db4b4edcc6ccb5259e Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 30 Oct 2025 22:47:56 -0700
Subject: [PATCH 12/24] Mark function FromSymtab too if new range is found
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e65bafa9ac4ca..b1e32757c31a9 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -840,7 +840,7 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
// Internal suffixes from Bolt
".cold", ".warm",
// Compiler internal
- ".llvm.",
+ ".llvm."
});
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
@@ -877,6 +877,7 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
} else {
// Store/Update Function Range from SymTab
Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
+ Func.FromSymtab = true;
auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
FuncRange &FRange = R.first->second;
>From 644fce908183f8069582bd127d7499e4a8e031fa Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 30 Oct 2025 23:05:13 -0700
Subject: [PATCH 13/24] Formatting
---
llvm/tools/llvm-profgen/PerfReader.cpp | 6 +++---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 20 ++++++++++----------
2 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 1c4b79554e1a5..a8a9c6eda85fd 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1332,9 +1332,9 @@ void PerfScriptReader::warnInvalidRange() {
emitWarningSummary(
UnmatchedRange, TotalRangeNum,
"of samples are from ranges that do not belong to any functions.");
- emitWarningSummary(
- RecoveredRange, TotalRangeNum,
- "of samples are from ranges that belong to functions recovered from symbol table.");
+ emitWarningSummary(RecoveredRange, TotalRangeNum,
+ "of samples are from ranges that belong to functions "
+ "recovered from symbol table.");
emitWarningSummary(
RangeCrossFunc, TotalRangeNum,
"of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index b1e32757c31a9..9ae10c1faaa0c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -834,14 +834,12 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
// Load binary functions from symbol table when Debug info is incomplete.
// Strip the internal suffixes which are not reflected in the DWARF info.
const SmallVector<StringRef, 6> Suffixes(
- {
- // Internal suffixes from CoroSplit pass
- ".cleanup", ".destroy", ".resume",
- // Internal suffixes from Bolt
- ".cold", ".warm",
- // Compiler internal
- ".llvm."
- });
+ {// Internal suffixes from CoroSplit pass
+ ".cleanup", ".destroy", ".resume",
+ // Internal suffixes from Bolt
+ ".cold", ".warm",
+ // Compiler internal
+ ".llvm."});
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -871,8 +869,10 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
WithColor::warning()
<< "Conflicting symbol " << Name << " already exists in DWARF as "
- << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr)
- << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to "
+ << Range->getFuncName() << " at address "
+ << format("%8" PRIx64, StartAddr)
+ << ". The DWARF indicates a range from "
+ << format("%8" PRIx64, Range->StartAddress) << " to "
<< format("%8" PRIx64, Range->EndAddress) << "\n";
} else {
// Store/Update Function Range from SymTab
>From 9188eff74c1fcfe4a21ff41ef92d8df4a8b4b1f2 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 31 Oct 2025 19:26:25 -0700
Subject: [PATCH 14/24] Fix suffix strip && refactor the function checks
---
llvm/include/llvm/ProfileData/SampleProf.h | 2 +-
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 53 +++++++++++-----------
llvm/tools/llvm-profgen/ProfiledBinary.h | 4 +-
3 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index dd0495f29e8c3..2bf3312446443 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1235,7 +1235,7 @@ class FunctionSamples {
if (It == StringRef::npos)
continue;
auto Dit = Cand.rfind('.');
- if (Dit == It + Suffix.size() - 1)
+ if (Dit == It || Dit == It + Suffix.size() - 1)
Cand = Cand.substr(0, It);
}
return Cand;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 9ae10c1faaa0c..215a9463aad33 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -265,7 +265,7 @@ void ProfiledBinary::load() {
decodePseudoProbe(Obj);
if (LoadFunctionFromSymbol && UsePseudoProbes)
- populateSymbolsFromBinary(Obj);
+ loadSymbolsFromSymtab(Obj);
// Disassemble the text sections.
disassemble(Obj);
@@ -830,16 +830,16 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
}
}
-void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
// Load binary functions from symbol table when Debug info is incomplete.
// Strip the internal suffixes which are not reflected in the DWARF info.
- const SmallVector<StringRef, 6> Suffixes(
+ const SmallVector<StringRef, 10> Suffixes(
{// Internal suffixes from CoroSplit pass
".cleanup", ".destroy", ".resume",
// Internal suffixes from Bolt
".cold", ".warm",
- // Compiler internal
- ".llvm."});
+ // Compiler/LTO internal
+ ".llvm.", ".part.", ".isra.", ".constprop.", ".lto_priv."});
StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -857,33 +857,34 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
- auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
- auto &Func = Ret.first->second;
- if (Ret.second) {
- Func.FuncName = Ret.first->first;
- Func.FromSymtab = true;
- HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
- }
+ auto Range = findFuncRange(StartAddr);
+ if (!Range || Range->StartAddress != StartAddr) {
+ // Function from symbol table not found previously in DWARF, store ranges.
+ auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+ auto &Func = Ret.first->second;
+ if (Ret.second) {
+ Func.FuncName = Ret.first->first;
+ HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+ }
- if (auto Range = findFuncRange(StartAddr)) {
- if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
- WithColor::warning()
- << "Conflicting symbol " << Name << " already exists in DWARF as "
- << Range->getFuncName() << " at address "
- << format("%8" PRIx64, StartAddr)
- << ". The DWARF indicates a range from "
- << format("%8" PRIx64, Range->StartAddress) << " to "
- << format("%8" PRIx64, Range->EndAddress) << "\n";
- } else {
- // Store/Update Function Range from SymTab
- Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
Func.FromSymtab = true;
+ Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
FuncRange &FRange = R.first->second;
+
FRange.Func = &Func;
FRange.StartAddress = StartAddr;
FRange.EndAddress = StartAddr + Size;
+
+ } else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
+ // Function already found from DWARF, check consistency between symbol
+ // table and DWARF.
+ WithColor::warning() << "Conflicting name for symbol" << Name
+ << " at address " << format("%8" PRIx64, StartAddr)
+ << ", but the DWARF symbol " << Range->getFuncName()
+ << " indicates a starting address at "
+ << format("%8" PRIx64, Range->StartAddress) << "\n";
}
}
}
@@ -912,10 +913,8 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
// BinaryFunction indexed by the name.
auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
auto &Func = Ret.first->second;
- if (Ret.second) {
+ if (Ret.second)
Func.FuncName = Ret.first->first;
- Func.FromSymtab = false;
- }
for (const auto &Range : Ranges) {
uint64_t StartAddress = Range.LowPC;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 753fcc935716b..50b2caaa0a5e3 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,7 +76,7 @@ struct BinaryFunction {
StringRef FuncName;
// End of range is an exclusive bound.
RangesTy Ranges;
- bool FromSymtab;
+ bool FromSymtab = false;
uint64_t getFuncSize() {
uint64_t Sum = 0;
@@ -358,7 +358,7 @@ class ProfiledBinary {
void populateSymbolAddressList(const object::ObjectFile *O);
// Load functions from its symbol table (when DWARF info is missing).
- void populateSymbolsFromBinary(const object::ObjectFile *O);
+ void loadSymbolsFromSymtab(const object::ObjectFile *O);
// A function may be spilt into multiple non-continuous address ranges. We use
// this to set whether start a function range is the real entry of the
>From b6ae0ba636db8ec09681bc05bd19d4cd83ed4acd Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Tue, 11 Nov 2025 23:55:17 -0800
Subject: [PATCH 15/24] Fixup corrupted DWARF function names using symbol table
info
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 65 ++++++++++++++++++----
1 file changed, 55 insertions(+), 10 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 215a9463aad33..5b241ba5d9cfc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -854,11 +854,12 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
if (Size == 0 || Type != SymbolRef::ST_Function)
continue;
+ const uint64_t EndAddr = StartAddr + Size;
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
auto Range = findFuncRange(StartAddr);
- if (!Range || Range->StartAddress != StartAddr) {
+ if (!Range) {
// Function from symbol table not found previously in DWARF, store ranges.
auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
auto &Func = Ret.first->second;
@@ -868,23 +869,67 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
}
Func.FromSymtab = true;
- Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
+ Func.Ranges.emplace_back(StartAddr, EndAddr);
auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
FuncRange &FRange = R.first->second;
FRange.Func = &Func;
FRange.StartAddress = StartAddr;
- FRange.EndAddress = StartAddr + Size;
+ FRange.EndAddress = EndAddr;
- } else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
- // Function already found from DWARF, check consistency between symbol
- // table and DWARF.
- WithColor::warning() << "Conflicting name for symbol" << Name
- << " at address " << format("%8" PRIx64, StartAddr)
+ } else if (SymName != Range->getFuncName()) {
+ // Function range already found from DWARF, but the symbol name from
+ // symbol table is inconsistent with debug info.
+ if (ShowDetailedWarning)
+ WithColor::warning()
+ << "Conflicting name for symbol " << Name << " with range ("
+ << format("%8" PRIx64, StartAddr) << ", "
+ << format("%8" PRIx64, EndAddr) << ")"
+ << ", but the DWARF symbol " << Range->getFuncName()
+ << " indicates an overlapping range ("
+ << format("%8" PRIx64, Range->StartAddress) << ", "
+ << format("%8" PRIx64, Range->EndAddress) << ")\n";
+
+ assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
+ "Mismatched function range");
+
+ auto ErrSym = BinaryFunctions.find(Range->getFuncName().str());
+ auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+ auto &Func = Ret.first->second;
+
+ // Symbol table may contain multiple symbol names of the same starting
+ // address. Only need to pick one from these.
+ if (!Ret.second)
+ continue;
+
+ Func.FuncName = Ret.first->first;
+ Func.Ranges = ErrSym->second.Ranges;
+ Func.FromSymtab = true;
+
+ HashBinaryFunctions.erase(MD5Hash(Range->getFuncName()));
+ BinaryFunctions.erase(ErrSym);
+
+ HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+ Range->Func = &Func;
+ for (auto [RangeStart, _] : Func.Ranges) {
+ if (auto FRange = findFuncRangeForStartAddr(RangeStart)) {
+ assert(FRange && "Cannot find function range");
+ FRange->Func = &Func;
+ }
+ }
+ } else if (StartAddr != Range->StartAddress &&
+ EndAddr != Range->EndAddress) {
+ // Function already found in DWARF, but the address range from symbol
+ // table conflicts/overlaps with the debug info.
+ WithColor::warning() << "Conflicting range for symbol " << Name
+ << " with range (" << format("%8" PRIx64, StartAddr)
+ << ", " << format("%8" PRIx64, EndAddr) << ")"
<< ", but the DWARF symbol " << Range->getFuncName()
- << " indicates a starting address at "
- << format("%8" PRIx64, Range->StartAddress) << "\n";
+ << " indicates another range ("
+ << format("%8" PRIx64, Range->StartAddress) << ", "
+ << format("%8" PRIx64, Range->EndAddress) << ")\n";
+ llvm_unreachable("invalid function range");
}
}
}
>From b1cbdd08bd67370505f38f2bb666510b2cb014b2 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 14 Nov 2025 14:45:10 -0800
Subject: [PATCH 16/24] Fixup overwritten DWARF symbol name when decoding
pseudo probe
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 13 ++++++++++++-
llvm/tools/llvm-profgen/ProfiledBinary.h | 4 ++++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 5b241ba5d9cfc..52cb477848b68 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -828,6 +828,14 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
SymbolStartAddrs[GUID] = Addr;
StartAddrToSymMap.emplace(Addr, GUID);
}
+
+ // Load DWARF name too if they are overwritten by the symbol table
+ for (auto [OldGUID, Func] : OverriddenBinaryFunctions) {
+ uint64_t GUID = Function::getGUIDAssumingExternalLinkage(Func->FuncName);
+ uint64_t Addr = SymbolStartAddrs[GUID];
+ SymbolStartAddrs[OldGUID] = Addr;
+ StartAddrToSymMap.emplace(Addr, OldGUID);
+ }
}
void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
@@ -903,13 +911,16 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
if (!Ret.second)
continue;
+ uint64_t OldGUID = MD5Hash(Range->getFuncName());
+
Func.FuncName = Ret.first->first;
Func.Ranges = ErrSym->second.Ranges;
Func.FromSymtab = true;
- HashBinaryFunctions.erase(MD5Hash(Range->getFuncName()));
+ HashBinaryFunctions.erase(OldGUID);
BinaryFunctions.erase(ErrSym);
+ OverriddenBinaryFunctions[OldGUID] = &Func;
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
Range->Func = &Func;
for (auto [RangeStart, _] : Func.Ranges) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 50b2caaa0a5e3..8858300678343 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -232,6 +232,10 @@ class ProfiledBinary {
// GUID to symbol start address map
DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
+ // GUID mapping of the overridden DWARF symbol names to the binary functions
+ // with the symbol table names
+ std::unordered_map<uint64_t, BinaryFunction *> OverriddenBinaryFunctions;
+
// These maps are for temporary use of warning diagnosis.
DenseSet<int64_t> AddrsWithMultipleSymbols;
DenseSet<std::pair<uint64_t, uint64_t>> AddrsWithInvalidInstruction;
>From 8927a273ee063fe6446e0a6e597aebb51f895352 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Sat, 15 Nov 2025 22:14:21 -0800
Subject: [PATCH 17/24] Fixup GuidFilter && Pseudo probe callee mismatch
---
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 14 ++++++++++++--
llvm/tools/llvm-profgen/ProfileGenerator.h | 2 +-
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 +++++++-
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..075250c4b1ba8 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -723,7 +723,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
}
StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName) {
// Get the function range by branch target if it's a call branch.
auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
@@ -732,6 +732,16 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
if (!FRange || !FRange->IsFuncEntry)
return StringRef();
+ if (RestoreSymbolName && FRange->Func->FromSymtab) {
+ const AddressProbesMap &Address2ProbesMap =
+ Binary->getAddress2ProbesMap();
+ for (const MCDecodedPseudoProbe &Probe :
+ Address2ProbesMap.find(TargetAddress)) {
+ if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
+ return FunctionSamples::getCanonicalFnName(ProbeDesc->FuncName);
+ }
+ }
+
return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
}
@@ -1352,7 +1362,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
getFunctionProfileForLeafProbe(CtxKey, CallProbe);
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
FunctionProfile.addTotalSamples(Count);
- StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
+ StringRef CalleeName = getCalleeNameForAddress(TargetAddress, true);
if (CalleeName.size() == 0)
continue;
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index a4b738016ec3a..3cf13e2618460 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,7 +109,7 @@ class ProfileGeneratorBase {
bool filterAmbiguousProfile(FunctionSamples &FS);
- StringRef getCalleeNameForAddress(uint64_t TargetAddress);
+ StringRef getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName = false);
void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 52cb477848b68..893ad8607a7ec 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -473,8 +473,12 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
for (auto &Range : F->Ranges) {
auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
- for (const auto &[StartAddr, Func] : make_range(GUIDs))
+ for (const auto &[StartAddr, Func] : make_range(GUIDs)) {
FuncStartAddresses[Func] = StartAddr;
+ // Function name may be changed when symbol table is loaded. Adding
+ // back the original GUID if possible
+ GuidFilter.insert(Func);
+ }
}
}
}
@@ -865,9 +869,11 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
const uint64_t EndAddr = StartAddr + Size;
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
+ assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress());
auto Range = findFuncRange(StartAddr);
if (!Range) {
+ assert(findFuncRange(EndAddr - 1) == nullptr);
// Function from symbol table not found previously in DWARF, store ranges.
auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
auto &Func = Ret.first->second;
>From 108bc08571d573dddaf6b79f084a5fdd7cc8040a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 14:15:52 -0800
Subject: [PATCH 18/24] format
---
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 6 +++---
llvm/tools/llvm-profgen/ProfileGenerator.h | 3 ++-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 075250c4b1ba8..efc6b6a0595a0 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -723,7 +723,8 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
}
StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
+ bool RestoreSymbolName) {
// Get the function range by branch target if it's a call branch.
auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
@@ -733,8 +734,7 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool Resto
return StringRef();
if (RestoreSymbolName && FRange->Func->FromSymtab) {
- const AddressProbesMap &Address2ProbesMap =
- Binary->getAddress2ProbesMap();
+ const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
for (const MCDecodedPseudoProbe &Probe :
Address2ProbesMap.find(TargetAddress)) {
if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 3cf13e2618460..9c347bb78dae1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,7 +109,8 @@ class ProfileGeneratorBase {
bool filterAmbiguousProfile(FunctionSamples &FS);
- StringRef getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName = false);
+ StringRef getCalleeNameForAddress(uint64_t TargetAddress,
+ bool RestoreSymbolName = false);
void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
>From 6e3a5aca55d1cd6c93c7e42988d4ee0201355f89 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 15:27:46 -0800
Subject: [PATCH 19/24] Clean up fixup logic in pseudo probe
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 21 +++++++--------------
llvm/tools/llvm-profgen/ProfiledBinary.h | 5 +++--
2 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 893ad8607a7ec..113a7ab818c24 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -471,14 +471,15 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
} else {
for (auto *F : ProfiledFunctions) {
GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
+ // Function name may be changed when symbol table is loaded. Adding
+ // back the original GUID if possible
+ auto OldGuid = OverriddenBinaryFunctions.find(F);
+ if (OldGuid != OverriddenBinaryFunctions.end())
+ GuidFilter.insert(OldGuid->second);
for (auto &Range : F->Ranges) {
auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
- for (const auto &[StartAddr, Func] : make_range(GUIDs)) {
+ for (const auto &[StartAddr, Func] : make_range(GUIDs))
FuncStartAddresses[Func] = StartAddr;
- // Function name may be changed when symbol table is loaded. Adding
- // back the original GUID if possible
- GuidFilter.insert(Func);
- }
}
}
}
@@ -832,14 +833,6 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
SymbolStartAddrs[GUID] = Addr;
StartAddrToSymMap.emplace(Addr, GUID);
}
-
- // Load DWARF name too if they are overwritten by the symbol table
- for (auto [OldGUID, Func] : OverriddenBinaryFunctions) {
- uint64_t GUID = Function::getGUIDAssumingExternalLinkage(Func->FuncName);
- uint64_t Addr = SymbolStartAddrs[GUID];
- SymbolStartAddrs[OldGUID] = Addr;
- StartAddrToSymMap.emplace(Addr, OldGUID);
- }
}
void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
@@ -926,7 +919,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
HashBinaryFunctions.erase(OldGUID);
BinaryFunctions.erase(ErrSym);
- OverriddenBinaryFunctions[OldGUID] = &Func;
+ OverriddenBinaryFunctions[&Func] = OldGUID;
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
Range->Func = &Func;
for (auto [RangeStart, _] : Func.Ranges) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 8858300678343..4268bd74e41fc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -232,9 +232,10 @@ class ProfiledBinary {
// GUID to symbol start address map
DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
- // GUID mapping of the overridden DWARF symbol names to the binary functions
+ // GUID mapping of the overridden DWARF symbol names by the binary functions
// with the symbol table names
- std::unordered_map<uint64_t, BinaryFunction *> OverriddenBinaryFunctions;
+ std::unordered_map<const BinaryFunction *, uint64_t>
+ OverriddenBinaryFunctions;
// These maps are for temporary use of warning diagnosis.
DenseSet<int64_t> AddrsWithMultipleSymbols;
>From 3d8ad533c10b1b5a39479f3e23c990f11886c3cc Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 17:15:38 -0800
Subject: [PATCH 20/24] Further cleanup and add more GuidFilters
---
llvm/tools/llvm-profgen/PerfReader.cpp | 2 +-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 2 +-
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 44 +++++---------------
llvm/tools/llvm-profgen/ProfiledBinary.h | 10 ++---
4 files changed, 18 insertions(+), 40 deletions(-)
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index a8a9c6eda85fd..b827ebfe261f3 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1310,7 +1310,7 @@ void PerfScriptReader::warnInvalidRange() {
continue;
}
- if (FRange->Func->FromSymtab)
+ if (FRange->Func->HasSymtabName)
RecoveredRange += I.second;
if (EndAddress >= FRange->EndAddress) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index efc6b6a0595a0..4a5d3d4aff3cb 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -733,7 +733,7 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
if (!FRange || !FRange->IsFuncEntry)
return StringRef();
- if (RestoreSymbolName && FRange->Func->FromSymtab) {
+ if (RestoreSymbolName && FRange->Func->HasSymtabName) {
const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
for (const MCDecodedPseudoProbe &Probe :
Address2ProbesMap.find(TargetAddress)) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 113a7ab818c24..7d5c3cc2ff148 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -471,11 +471,11 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
} else {
for (auto *F : ProfiledFunctions) {
GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
- // Function name may be changed when symbol table is loaded. Adding
- // back the original GUID if possible
- auto OldGuid = OverriddenBinaryFunctions.find(F);
- if (OldGuid != OverriddenBinaryFunctions.end())
- GuidFilter.insert(OldGuid->second);
+ // Function may have different names in symbol table. Adding
+ // back all the GUIDs if possible
+ auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F);
+ for (const auto &[_, Func] : make_range(AltGUIDs))
+ GuidFilter.insert(Func);
for (auto &Range : F->Ranges) {
auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
for (const auto &[StartAddr, Func] : make_range(GUIDs))
@@ -875,7 +875,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
}
- Func.FromSymtab = true;
+ Func.HasSymtabName = true;
Func.Ranges.emplace_back(StartAddr, EndAddr);
auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
@@ -887,7 +887,8 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
} else if (SymName != Range->getFuncName()) {
// Function range already found from DWARF, but the symbol name from
- // symbol table is inconsistent with debug info.
+ // symbol table is inconsistent with debug info. Log this discrepaency and
+ // the alternative function GUID.
if (ShowDetailedWarning)
WithColor::warning()
<< "Conflicting name for symbol " << Name << " with range ("
@@ -901,33 +902,10 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
"Mismatched function range");
- auto ErrSym = BinaryFunctions.find(Range->getFuncName().str());
- auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
- auto &Func = Ret.first->second;
-
- // Symbol table may contain multiple symbol names of the same starting
- // address. Only need to pick one from these.
- if (!Ret.second)
- continue;
+ Range->Func->HasSymtabName = true;
+ AlternativeFunctionGUIDs.emplace(Range->Func,
+ MD5Hash(StringRef(SymName)));
- uint64_t OldGUID = MD5Hash(Range->getFuncName());
-
- Func.FuncName = Ret.first->first;
- Func.Ranges = ErrSym->second.Ranges;
- Func.FromSymtab = true;
-
- HashBinaryFunctions.erase(OldGUID);
- BinaryFunctions.erase(ErrSym);
-
- OverriddenBinaryFunctions[&Func] = OldGUID;
- HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
- Range->Func = &Func;
- for (auto [RangeStart, _] : Func.Ranges) {
- if (auto FRange = findFuncRangeForStartAddr(RangeStart)) {
- assert(FRange && "Cannot find function range");
- FRange->Func = &Func;
- }
- }
} else if (StartAddr != Range->StartAddress &&
EndAddr != Range->EndAddress) {
// Function already found in DWARF, but the address range from symbol
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 4268bd74e41fc..afd2fb482b0cd 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,7 +76,7 @@ struct BinaryFunction {
StringRef FuncName;
// End of range is an exclusive bound.
RangesTy Ranges;
- bool FromSymtab = false;
+ bool HasSymtabName = false;
uint64_t getFuncSize() {
uint64_t Sum = 0;
@@ -232,10 +232,10 @@ class ProfiledBinary {
// GUID to symbol start address map
DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
- // GUID mapping of the overridden DWARF symbol names by the binary functions
- // with the symbol table names
- std::unordered_map<const BinaryFunction *, uint64_t>
- OverriddenBinaryFunctions;
+ // Binary function to GUID mapping that stores the alternative names in symbol
+ // table, despite the original name from DWARF info
+ std::unordered_multimap<const BinaryFunction *, uint64_t>
+ AlternativeFunctionGUIDs;
// These maps are for temporary use of warning diagnosis.
DenseSet<int64_t> AddrsWithMultipleSymbols;
>From 1344cddecb9798f308e8df9491b168ec8fc99939 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 20 Nov 2025 23:19:10 -0800
Subject: [PATCH 21/24] Infer callee name with pseudo probe names
---
llvm/include/llvm/MC/MCPseudoProbe.h | 1 +
llvm/tools/llvm-profgen/Options.h | 1 +
llvm/tools/llvm-profgen/PerfReader.cpp | 2 +-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 23 +++---
llvm/tools/llvm-profgen/ProfileGenerator.h | 3 +-
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 74 ++++++++++++++++----
llvm/tools/llvm-profgen/ProfiledBinary.h | 20 +++++-
7 files changed, 94 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index ac28e45891df2..fc722378b586a 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -328,6 +328,7 @@ class MCDecodedPseudoProbeInlineTree
// Return false if it's a dummy inline site
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
+ bool isTopLevelFunc() const { return !isRoot() && Parent->isRoot(); }
InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
Probes = ProbesRef.data();
diff --git a/llvm/tools/llvm-profgen/Options.h b/llvm/tools/llvm-profgen/Options.h
index f94cf9118c06a..b2c941fb01945 100644
--- a/llvm/tools/llvm-profgen/Options.h
+++ b/llvm/tools/llvm-profgen/Options.h
@@ -22,6 +22,7 @@ extern cl::opt<bool> ShowDetailedWarning;
extern cl::opt<bool> InferMissingFrames;
extern cl::opt<bool> EnableCSPreInliner;
extern cl::opt<bool> UseContextCostForPreInliner;
+extern cl::opt<bool> LoadFunctionFromSymbol;
} // end namespace llvm
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index b827ebfe261f3..1dc59321fd91f 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1310,7 +1310,7 @@ void PerfScriptReader::warnInvalidRange() {
continue;
}
- if (FRange->Func->HasSymtabName)
+ if (FRange->Func->NameStatus != DwarfNameStatus::Matched)
RecoveredRange += I.second;
if (EndAddress >= FRange->EndAddress) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 4a5d3d4aff3cb..e39ab6e4ba979 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -503,8 +503,11 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
- if (Binary->usePseudoProbes())
+ if (Binary->usePseudoProbes()) {
Binary->decodePseudoProbe();
+ if (LoadFunctionFromSymbol)
+ Binary->loadSymbolsFromPseudoProbe();
+ }
if (SampleCounters) {
if (Binary->usePseudoProbes()) {
@@ -723,8 +726,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
}
StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
- bool RestoreSymbolName) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
// Get the function range by branch target if it's a call branch.
auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
@@ -733,14 +735,9 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
if (!FRange || !FRange->IsFuncEntry)
return StringRef();
- if (RestoreSymbolName && FRange->Func->HasSymtabName) {
- const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
- for (const MCDecodedPseudoProbe &Probe :
- Address2ProbesMap.find(TargetAddress)) {
- if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
- return FunctionSamples::getCanonicalFnName(ProbeDesc->FuncName);
- }
- }
+ auto FuncName = Binary->findPseudoProbeName(FRange->Func);
+ if (FuncName.size())
+ return FunctionSamples::getCanonicalFnName(FuncName);
return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
}
@@ -929,6 +926,8 @@ void CSProfileGenerator::generateProfile() {
Binary->decodePseudoProbe();
if (InferMissingFrames)
initializeMissingFrameInferrer();
+ if (LoadFunctionFromSymbol)
+ Binary->loadSymbolsFromPseudoProbe();
}
if (SampleCounters) {
@@ -1362,7 +1361,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
getFunctionProfileForLeafProbe(CtxKey, CallProbe);
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
FunctionProfile.addTotalSamples(Count);
- StringRef CalleeName = getCalleeNameForAddress(TargetAddress, true);
+ StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
if (CalleeName.size() == 0)
continue;
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 9c347bb78dae1..a4b738016ec3a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,8 +109,7 @@ class ProfileGeneratorBase {
bool filterAmbiguousProfile(FunctionSamples &FS);
- StringRef getCalleeNameForAddress(uint64_t TargetAddress,
- bool RestoreSymbolName = false);
+ StringRef getCalleeNameForAddress(uint64_t TargetAddress);
void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 7d5c3cc2ff148..2253706cf5736 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -37,6 +37,13 @@ cl::opt<bool> ShowSourceLocations("show-source-locations",
cl::desc("Print source locations."),
cl::cat(ProfGenCategory));
+cl::opt<bool>
+ LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
+ cl::desc("Gather additional binary function info "
+ "from symbols (e.g. .symtab) in case "
+ "dwarf info is incomplete."),
+ cl::cat(ProfGenCategory));
+
static cl::opt<bool>
ShowCanonicalFnName("show-canonical-fname",
cl::desc("Print canonical function name."),
@@ -65,13 +72,6 @@ static cl::list<std::string> DisassembleFunctions(
"names only. Only work with show-disassembly-only"),
cl::cat(ProfGenCategory));
-static cl::opt<bool>
- LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
- cl::desc("Gather additional binary function info "
- "from symbols (e.g. .symtab) in case "
- "dwarf info is incomplete."),
- cl::cat(ProfGenCategory));
-
static cl::opt<bool>
KernelBinary("kernel",
cl::desc("Generate the profile for Linux kernel binary."),
@@ -471,8 +471,10 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
} else {
for (auto *F : ProfiledFunctions) {
GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
- // Function may have different names in symbol table. Adding
- // back all the GUIDs if possible
+ // DWARF name might be broken when a DWARF32 .debug_str.dwo section
+ // execeeds 4GB. We expect symbol table to contain the correct function
+ // names which matches the pseudo probe. Adding back all the GUIDs if
+ // possible.
auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F);
for (const auto &[_, Func] : make_range(AltGUIDs))
GuidFilter.insert(Func);
@@ -862,11 +864,13 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
const uint64_t EndAddr = StartAddr + Size;
const StringRef SymName =
FunctionSamples::getCanonicalFnName(Name, Suffixes);
- assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress());
+ assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress() &&
+ "Function range is invalid.");
auto Range = findFuncRange(StartAddr);
if (!Range) {
- assert(findFuncRange(EndAddr - 1) == nullptr);
+ assert(findFuncRange(EndAddr - 1) == nullptr &&
+ "Function range overlaps with existing functions.");
// Function from symbol table not found previously in DWARF, store ranges.
auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
auto &Func = Ret.first->second;
@@ -875,7 +879,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
}
- Func.HasSymtabName = true;
+ Func.NameStatus = DwarfNameStatus::Missing;
Func.Ranges.emplace_back(StartAddr, EndAddr);
auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
@@ -887,7 +891,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
} else if (SymName != Range->getFuncName()) {
// Function range already found from DWARF, but the symbol name from
- // symbol table is inconsistent with debug info. Log this discrepaency and
+ // symbol table is inconsistent with debug info. Log this discrepancy and
// the alternative function GUID.
if (ShowDetailedWarning)
WithColor::warning()
@@ -902,7 +906,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
"Mismatched function range");
- Range->Func->HasSymtabName = true;
+ Range->Func->NameStatus = DwarfNameStatus::Mismatch;
AlternativeFunctionGUIDs.emplace(Range->Func,
MD5Hash(StringRef(SymName)));
@@ -1136,6 +1140,48 @@ void ProfiledBinary::computeInlinedContextSizeForFunc(
}
}
+void ProfiledBinary::loadSymbolsFromPseudoProbe() {
+ if (!UsePseudoProbes)
+ return;
+
+ const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap();
+ for (auto &[Addr, Range] : StartAddrToFuncRangeMap) {
+ auto Func = Range.Func;
+ if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
+ continue;
+#ifndef NDEBUG
+ if (PseudoProbeNames.count(Func))
+ continue;
+#endif
+ const auto &Probe = Address2ProbesMap.find(Addr).begin();
+ if (Probe != Address2ProbesMap.end()) {
+ const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+ Probe->get().getInlineTreeNode();
+ while (!InlineTreeNode->isTopLevelFunc())
+ InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
+ InlineTreeNode->Parent);
+
+ auto TopLevelProbes = InlineTreeNode->getProbes();
+ auto TopProbe = TopLevelProbes.begin();
+ assert(TopProbe != TopLevelProbes.end() &&
+ TopProbe->getAddress() >= Addr &&
+ "Top level pseudo probe does not match function range");
+
+ const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
+ auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
+ assert((Ret.second || Ret.first->second == ProbeDesc->FuncName) &&
+ "Mismatched pseudo probe names");
+ }
+ }
+}
+
+StringRef ProfiledBinary::findPseudoProbeName(const BinaryFunction *Func) {
+ auto ProbeName = PseudoProbeNames.find(Func);
+ if (ProbeName == PseudoProbeNames.end())
+ return StringRef();
+ return ProbeName->second;
+}
+
void ProfiledBinary::inferMissingFrames(
const SmallVectorImpl<uint64_t> &Context,
SmallVectorImpl<uint64_t> &NewContext) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index afd2fb482b0cd..1a83f8221df11 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -72,11 +72,22 @@ enum SpecialFrameAddr {
using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
+enum DwarfNameStatus {
+ // Dwarf name matches with the symbol table (or symbol table just doesn't have
+ // this entry)
+ Matched = 0,
+ // Dwarf name is missing, but we fixed it with the name from symbol table
+ Missing = 1,
+ // Symbol table has different names on this. Log these GUIDs in
+ // AlternativeFunctionGUIDs
+ Mismatch = 2,
+};
+
struct BinaryFunction {
StringRef FuncName;
// End of range is an exclusive bound.
RangesTy Ranges;
- bool HasSymtabName = false;
+ DwarfNameStatus NameStatus = DwarfNameStatus::Matched;
uint64_t getFuncSize() {
uint64_t Sum = 0;
@@ -237,6 +248,9 @@ class ProfiledBinary {
std::unordered_multimap<const BinaryFunction *, uint64_t>
AlternativeFunctionGUIDs;
+ // Mapping of profiled binary function to its pseudo probe name
+ std::unordered_map<const BinaryFunction *, StringRef> PseudoProbeNames;
+
// These maps are for temporary use of warning diagnosis.
DenseSet<int64_t> AddrsWithMultipleSymbols;
DenseSet<std::pair<uint64_t, uint64_t>> AddrsWithInvalidInstruction;
@@ -608,6 +622,10 @@ class ProfiledBinary {
void computeInlinedContextSizeForFunc(const BinaryFunction *Func);
+ void loadSymbolsFromPseudoProbe();
+
+ StringRef findPseudoProbeName(const BinaryFunction *Func);
+
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);
}
>From 2978e21cbab14d0f5f1d62e1e481c8b4ce41597f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 21 Nov 2025 21:30:34 -0800
Subject: [PATCH 22/24] promote eligible entry point functions
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2253706cf5736..e2609ccd7df0b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -539,7 +539,9 @@ void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
// Set IsFuncEntry to ture if there is only one range in the function or the
// RangeSymName from ELF is equal to its DWARF-based function name.
if (FuncRange->Func->Ranges.size() == 1 ||
- (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
+ (!FuncRange->IsFuncEntry &&
+ (FuncRange->getFuncName() == RangeSymName ||
+ FuncRange->Func->NameStatus != DwarfNameStatus::Matched)))
FuncRange->IsFuncEntry = true;
}
>From eaab9dfd3c9baa3fe045a1c58934f03e7b15ed2a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 21 Nov 2025 22:14:39 -0800
Subject: [PATCH 23/24] Update pseudo probe search range && range checks
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e2609ccd7df0b..cc601b30bf342 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -1151,14 +1151,10 @@ void ProfiledBinary::loadSymbolsFromPseudoProbe() {
auto Func = Range.Func;
if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
continue;
-#ifndef NDEBUG
- if (PseudoProbeNames.count(Func))
- continue;
-#endif
- const auto &Probe = Address2ProbesMap.find(Addr).begin();
- if (Probe != Address2ProbesMap.end()) {
+ const auto &Probe = Address2ProbesMap.find(Addr, Range.EndAddress);
+ if (Probe.begin() != Probe.end()) {
const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
- Probe->get().getInlineTreeNode();
+ Probe.begin()->get().getInlineTreeNode();
while (!InlineTreeNode->isTopLevelFunc())
InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
InlineTreeNode->Parent);
>From c53e0ae47bda56620eb4876f1a82bd745ec406f9 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Sun, 23 Nov 2025 20:35:53 -0800
Subject: [PATCH 24/24] Iterate through ProfiledFunctions to reduce scope
---
llvm/tools/llvm-profgen/ProfiledBinary.cpp | 56 ++++++++++++++--------
1 file changed, 35 insertions(+), 21 deletions(-)
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index cc601b30bf342..6303eb1615854 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -1147,28 +1147,42 @@ void ProfiledBinary::loadSymbolsFromPseudoProbe() {
return;
const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap();
- for (auto &[Addr, Range] : StartAddrToFuncRangeMap) {
- auto Func = Range.Func;
- if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
+ for (auto *Func : ProfiledFunctions) {
+ if (Func->NameStatus != DwarfNameStatus::Mismatch)
continue;
- const auto &Probe = Address2ProbesMap.find(Addr, Range.EndAddress);
- if (Probe.begin() != Probe.end()) {
- const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
- Probe.begin()->get().getInlineTreeNode();
- while (!InlineTreeNode->isTopLevelFunc())
- InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
- InlineTreeNode->Parent);
-
- auto TopLevelProbes = InlineTreeNode->getProbes();
- auto TopProbe = TopLevelProbes.begin();
- assert(TopProbe != TopLevelProbes.end() &&
- TopProbe->getAddress() >= Addr &&
- "Top level pseudo probe does not match function range");
-
- const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
- auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
- assert((Ret.second || Ret.first->second == ProbeDesc->FuncName) &&
- "Mismatched pseudo probe names");
+ for (auto &[StartAddr, EndAddr] : Func->Ranges) {
+ auto Range = findFuncRangeForStartAddr(StartAddr);
+ if (!Range->IsFuncEntry)
+ continue;
+ const auto &Probe = Address2ProbesMap.find(StartAddr, EndAddr);
+ if (Probe.begin() != Probe.end()) {
+ const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+ Probe.begin()->get().getInlineTreeNode();
+ while (!InlineTreeNode->isTopLevelFunc())
+ InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
+ InlineTreeNode->Parent);
+
+ auto TopLevelProbes = InlineTreeNode->getProbes();
+ auto TopProbe = TopLevelProbes.begin();
+ assert(TopProbe != TopLevelProbes.end() &&
+ TopProbe->getAddress() >= StartAddr &&
+ TopProbe->getAddress() < EndAddr &&
+ "Top level pseudo probe does not match function range");
+
+ const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
+ auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
+ if (!Ret.second && Ret.first->second != ProbeDesc->FuncName &&
+ ShowDetailedWarning)
+ WithColor::warning()
+ << "Mismatched pseudo probe names in function " << Func->FuncName
+ << " at range: (" << format("%8" PRIx64, StartAddr) << ", "
+ << format("%8" PRIx64, EndAddr) << "). "
+ << "The previously found pseudo probe name is "
+ << Ret.first->second << " but it conflicts with name "
+ << ProbeDesc->FuncName
+ << " This likely indicates a DWARF error that produces "
+ "conflicting symbols at the same starting address.\n";
+ }
}
}
}
More information about the llvm-commits
mailing list