[llvm] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete (PR #163654)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 23 20:41:48 PST 2025


https://github.com/HighW4y2H3ll updated https://github.com/llvm/llvm-project/pull/163654

>From f34ab2d0d5f767d46f31452a8231e56f67ed4a21 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 9 Oct 2025 12:07:13 -0700
Subject: [PATCH 01/24] [llvm-profgen] Loading binary functions from .symtab
 when DWARF info is incomplete

---
 llvm/include/llvm/ProfileData/SampleProf.h   | 10 ++++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 ++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   | 44 ++++++++++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.h     |  3 ++
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 3dd34aba2d716..4adbe13b6712b 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,12 +1214,18 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+    SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+    return getCanonicalFnName(FnName, KnownSuffixes, Attr);
+  }
+
+  static StringRef getCanonicalFnName(StringRef FnName,
+                                      const SmallVector<StringRef> &Suffixes,
+                                      StringRef Attr = "selected") {
     if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
     if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const auto &Suf : KnownSuffixes) {
+      for (const auto &Suf : Suffixes) {
         StringRef Suffix(Suf);
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..058b154fc5a57 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,29 +449,56 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   // Go through all the stacks, ranges and branches in sample counters, use
   // the start of the range to look up the function it belongs and record the
   // function.
+  uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
+  uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
   for (const auto &CI : *SampleCounters) {
     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
       for (auto StackAddr : CtxKey->Context) {
+        uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+        TotalStkAddr += inc;
         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
           ProfiledFunctions.insert(FRange->Func);
+        else
+          ErrStkAddr += inc;
       }
     }
 
     for (auto Item : CI.second.RangeCounter) {
       uint64_t StartAddress = Item.first.first;
+      uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
+      TotalFuncRange += inc;
       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrFuncRange += inc;
     }
 
     for (auto Item : CI.second.BranchCounter) {
       uint64_t SourceAddress = Item.first.first;
       uint64_t TargetAddress = Item.first.second;
+      uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
+      uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
+      TotalSrc += srcinc;
       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrSrc += srcinc;
+      TotalTgt += tgtinc;
       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrTgt += tgtinc;
     }
   }
+
+  if (ErrStkAddr)
+    WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+  if (ErrFuncRange)
+    WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+  if (ErrSrc)
+    WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+  if (ErrTgt)
+    WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 94728ce4abffe..2d9a13b97114c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,6 +257,8 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
+  populateSymbolsFromElf(Obj);
+
   // Disassemble the text sections.
   disassemble(Obj);
 
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
+void ProfiledBinary::populateSymbolsFromElf(
+    const ObjectFile *Obj) {
+  // Load binary functions from ELF symbol table when DWARF info is incomplete
+  StringRef FileName = Obj->getFileName();
+  for (const ELFSymbolRef Symbol : Obj->symbols()) {
+    const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+    const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+    const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    const uint64_t Size = Symbol.getSize();
+
+    if (Size == 0 || Type != SymbolRef::ST_Function)
+      continue;
+
+    SmallVector<StringRef> Suffixes(
+      {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+    const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+    auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+    auto &Func = Ret.first->second;
+    if (Ret.second)
+      Func.FuncName = Ret.first->first;
+
+    if (auto Range = findFuncRange(Addr)) {
+      if (Ret.second && ShowDetailedWarning)
+        WithColor::warning()
+            << "Symbol " << Name << " start address "
+            << format("%8" PRIx64, Addr) << " already exists in DWARF at "
+            << format("%8" PRIx64, Range->StartAddress) << " in function "
+            << Range->getFuncName() << "\n";
+    } else {
+      // Store/Update Function Range from SymTab
+      Func.Ranges.emplace_back(Addr, Addr + Size);
+
+      auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+      FuncRange &FRange = R.first->second;
+      FRange.Func = &Func;
+      FRange.StartAddress = Addr;
+      FRange.EndAddress = Addr + Size;
+    }
+  }
+}
+
 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
   for (const auto &DieInfo : CompilationUnit.dies()) {
     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5a814b7dbd52d..238c27fbc4c9f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -356,6 +356,9 @@ class ProfiledBinary {
   // Create symbol to its start address mapping.
   void populateSymbolAddressList(const object::ObjectFile *O);
 
+  // Load functions from its symbol table (when DWARF info is missing).
+  void populateSymbolsFromElf(const object::ObjectFile *O);
+
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start a function range is the real entry of the
   // function and also set false to the non-function label.

>From 0fd352d28316691d97c245f80fe8205309c3b253 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 15 Oct 2025 16:25:56 -0700
Subject: [PATCH 02/24] formatting

---
 llvm/include/llvm/ProfileData/SampleProf.h   |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 13 +++++++++----
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   |  8 ++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 4adbe13b6712b..dadf718d0b904 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,7 +1214,7 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    SmallVector<StringRef> KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix});
+    SmallVector<StringRef> KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix});
     return getCanonicalFnName(FnName, KnownSuffixes, Attr);
   }
 
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 058b154fc5a57..0478d5568085a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -492,13 +492,18 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   }
 
   if (ErrStkAddr)
-    WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+    WithColor::warning() << "Cannot find Stack Address from DWARF Info: "
+                         << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
   if (ErrFuncRange)
-    WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n";
+    WithColor::warning() << "Cannot find Function Range from DWARF Info: "
+                         << ErrFuncRange << "/" << TotalFuncRange
+                         << " missing\n";
   if (ErrSrc)
-    WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n";
+    WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: "
+                         << ErrSrc << "/" << TotalSrc << " missing\n";
   if (ErrTgt)
-    WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n";
+    WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: "
+                         << ErrTgt << "/" << TotalTgt << " missing\n";
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2d9a13b97114c..aa385c0db50db 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -822,8 +822,7 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
-void ProfiledBinary::populateSymbolsFromElf(
-    const ObjectFile *Obj) {
+void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) {
   // Load binary functions from ELF symbol table when DWARF info is incomplete
   StringRef FileName = Obj->getFileName();
   for (const ELFSymbolRef Symbol : Obj->symbols()) {
@@ -836,8 +835,9 @@ void ProfiledBinary::populateSymbolsFromElf(
       continue;
 
     SmallVector<StringRef> Suffixes(
-      {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
-    const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes);
+        {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+    const StringRef SymName =
+        FunctionSamples::getCanonicalFnName(Name, Suffixes);
 
     auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
     auto &Func = Ret.first->second;

>From c097d374402f11fe00d997495b10a834ff6a4d9e Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 16 Oct 2025 10:59:47 -0700
Subject: [PATCH 03/24] Fix branch target check when an instruction branches to
 itself. (i.e. jmp 0)

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index aa385c0db50db..2ceeba28f77a8 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -606,13 +606,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
       // Record potential call targets for tail frame inference later-on.
       if (InferMissingFrames && FRange) {
         uint64_t Target = 0;
-        MIA->evaluateBranch(Inst, Address, Size, Target);
+        bool Err = MIA->evaluateBranch(Inst, Address, Size, Target);
         if (MCDesc.isCall()) {
           // Indirect call targets are unknown at this point. Recording the
           // unknown target (zero) for further LBR-based refinement.
           MissingContextInferrer->CallEdges[Address].insert(Target);
         } else if (MCDesc.isUnconditionalBranch()) {
-          assert(Target &&
+          assert(Err &&
                  "target should be known for unconditional direct branch");
           // Any inter-function unconditional jump is considered tail call at
           // this point. This is not 100% accurate and could further be

>From a19064d73c04e68757b2cf1323c78b40b649f75f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 16 Oct 2025 22:36:53 -0700
Subject: [PATCH 04/24] Making the API compatible with non-ELF binaries

---
 llvm/include/llvm/Object/ObjectFile.h      | 5 +++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++++----
 llvm/tools/llvm-profgen/ProfiledBinary.h   | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 289cc770e3466..6ceedd2d310f7 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -198,6 +198,7 @@ class SymbolRef : public BasicSymbolRef {
   /// Get the alignment of this symbol as the actual value (not log 2).
   uint32_t getAlignment() const;
   uint64_t getCommonSize() const;
+  uint64_t getSize() const;
   Expected<SymbolRef::Type> getType() const;
 
   /// Get section this symbol is defined in reference to. Result is
@@ -482,6 +483,10 @@ inline uint64_t SymbolRef::getCommonSize() const {
   return getObject()->getCommonSymbolSize(getRawDataRefImpl());
 }
 
+inline uint64_t SymbolRef::getSize() const {
+  return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl());
+}
+
 inline Expected<section_iterator> SymbolRef::getSection() const {
   return getObject()->getSymbolSection(getRawDataRefImpl());
 }
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2ceeba28f77a8..c9561aa9cfb3c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,7 +257,7 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
-  populateSymbolsFromElf(Obj);
+  populateSymbolsFromBinary(Obj);
 
   // Disassemble the text sections.
   disassemble(Obj);
@@ -822,10 +822,10 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
-void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) {
-  // Load binary functions from ELF symbol table when DWARF info is incomplete
+void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+  // Load binary functions from symbol table when Debug info is incomplete
   StringRef FileName = Obj->getFileName();
-  for (const ELFSymbolRef Symbol : Obj->symbols()) {
+  for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 238c27fbc4c9f..e73ffd3143e3d 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -357,7 +357,7 @@ class ProfiledBinary {
   void populateSymbolAddressList(const object::ObjectFile *O);
 
   // Load functions from its symbol table (when DWARF info is missing).
-  void populateSymbolsFromElf(const object::ObjectFile *O);
+  void populateSymbolsFromBinary(const object::ObjectFile *O);
 
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start a function range is the real entry of the

>From e12e694c1b9e3563dd8351e225b7acec05e12d5a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 20 Oct 2025 09:40:57 -0700
Subject: [PATCH 05/24] Fix

---
 llvm/include/llvm/ProfileData/SampleProf.h   |  9 ++--
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 43 ++++++++++----------
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   |  4 +-
 3 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index dadf718d0b904..6de5884253017 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,19 +1214,18 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    SmallVector<StringRef> KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix});
+    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr};
     return getCanonicalFnName(FnName, KnownSuffixes, Attr);
   }
 
-  static StringRef getCanonicalFnName(StringRef FnName,
-                                      const SmallVector<StringRef> &Suffixes,
+  static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[],
                                       StringRef Attr = "selected") {
     if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
     if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const auto &Suf : Suffixes) {
-        StringRef Suffix(Suf);
+      for (const char **Suf = Suffixes; *Suf; Suf++) {
+        StringRef Suffix(*Suf);
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 0478d5568085a..2f6f50912fbcf 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -454,56 +454,57 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   for (const auto &CI : *SampleCounters) {
     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
       for (auto StackAddr : CtxKey->Context) {
-        uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
-        TotalStkAddr += inc;
+        uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+        TotalStkAddr += Inc;
         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
           ProfiledFunctions.insert(FRange->Func);
         else
-          ErrStkAddr += inc;
+          ErrStkAddr += Inc;
       }
     }
 
     for (auto Item : CI.second.RangeCounter) {
       uint64_t StartAddress = Item.first.first;
-      uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0;
-      TotalFuncRange += inc;
+      uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0;
+      TotalFuncRange += Inc;
       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
         ProfiledFunctions.insert(FRange->Func);
       else
-        ErrFuncRange += inc;
+        ErrFuncRange += Inc;
     }
 
     for (auto Item : CI.second.BranchCounter) {
       uint64_t SourceAddress = Item.first.first;
       uint64_t TargetAddress = Item.first.second;
-      uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0;
-      uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0;
-      TotalSrc += srcinc;
+      uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0;
+      uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0;
+      TotalSrc += SrcInc;
       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
         ProfiledFunctions.insert(FRange->Func);
       else
-        ErrSrc += srcinc;
-      TotalTgt += tgtinc;
+        ErrSrc += SrcInc;
+      TotalTgt += TgtInc;
       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
         ProfiledFunctions.insert(FRange->Func);
       else
-        ErrTgt += tgtinc;
+        ErrTgt += TgtInc;
     }
   }
 
   if (ErrStkAddr)
-    WithColor::warning() << "Cannot find Stack Address from DWARF Info: "
-                         << ErrStkAddr << "/" << TotalStkAddr << " missing\n";
+    emitWarningSummary(
+        ErrStkAddr, TotalStkAddr,
+        "of stack address samples do not belong to any function");
   if (ErrFuncRange)
-    WithColor::warning() << "Cannot find Function Range from DWARF Info: "
-                         << ErrFuncRange << "/" << TotalFuncRange
-                         << " missing\n";
+    emitWarningSummary(
+        ErrFuncRange, TotalFuncRange,
+        "of function range samples do not belong to any function");
   if (ErrSrc)
-    WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: "
-                         << ErrSrc << "/" << TotalSrc << " missing\n";
+    emitWarningSummary(ErrSrc, TotalSrc,
+                       "of LBR source samples do not belong to any function");
   if (ErrTgt)
-    WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: "
-                         << ErrTgt << "/" << TotalTgt << " missing\n";
+    emitWarningSummary(ErrTgt, TotalTgt,
+                       "of LBR target samples do not belong to any function");
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index c9561aa9cfb3c..1dab93fc871d2 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -834,8 +834,8 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     if (Size == 0 || Type != SymbolRef::ST_Function)
       continue;
 
-    SmallVector<StringRef> Suffixes(
-        {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+    const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
+                              ".cold",    ".warm",   nullptr};
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
 

>From 5eead6b006e3a223b52b29078d449d3cf1a137d8 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 22 Oct 2025 16:29:30 -0700
Subject: [PATCH 06/24] Clean up getSymbolSize API and warnings

---
 llvm/include/llvm/Object/ELFObjectFile.h   |  6 ++++++
 llvm/include/llvm/Object/ObjectFile.h      |  3 ++-
 llvm/tools/llvm-profgen/PerfReader.cpp     | 11 +----------
 llvm/tools/llvm-profgen/ProfiledBinary.cpp |  4 ++--
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index ced1afdd4cc6a..cb7e6ef3458a9 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -310,6 +310,7 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+  uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override;
   Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
   uint8_t getSymbolBinding(DataRefImpl Symb) const override;
   uint8_t getSymbolOther(DataRefImpl Symb) const override;
@@ -703,6 +704,11 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
   return getSymbolSize(Symb);
 }
 
+template <class ELFT>
+uint64_t ELFObjectFile<ELFT>::getSymbolSizeImpl(DataRefImpl Symb) const {
+  return getSymbolSize(Symb);
+}
+
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
   Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 6ceedd2d310f7..bea61cf7c2214 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -256,6 +256,7 @@ class LLVM_ABI ObjectFile : public SymbolicFile {
   virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
   virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
   virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
+  virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; }
   virtual Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const = 0;
   virtual Expected<section_iterator>
   getSymbolSection(DataRefImpl Symb) const = 0;
@@ -484,7 +485,7 @@ inline uint64_t SymbolRef::getCommonSize() const {
 }
 
 inline uint64_t SymbolRef::getSize() const {
-  return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl());
+  return getObject()->getSymbolSizeImpl(getRawDataRefImpl());
 }
 
 inline Expected<section_iterator> SymbolRef::getSection() const {
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 183b248a72320..e9f7b666c95c7 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1274,8 +1274,6 @@ void PerfScriptReader::warnInvalidRange() {
 
   const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
                                   "likely due to profile and binary mismatch.";
-  const char *DanglingRangeMsg = "Range does not belong to any functions, "
-                                 "likely from PLT, .init or .fini section.";
   const char *RangeCrossFuncMsg =
       "Fall through range should not cross function boundaries, likely due to "
       "profile and binary mismatch.";
@@ -1283,7 +1281,6 @@ void PerfScriptReader::warnInvalidRange() {
 
   uint64_t TotalRangeNum = 0;
   uint64_t InstNotBoundary = 0;
-  uint64_t UnmatchedRange = 0;
   uint64_t RangeCrossFunc = 0;
   uint64_t BogusRange = 0;
 
@@ -1303,11 +1300,8 @@ void PerfScriptReader::warnInvalidRange() {
     }
 
     auto *FRange = Binary->findFuncRange(StartAddress);
-    if (!FRange) {
-      UnmatchedRange += I.second;
-      WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
+    if (!FRange)
       continue;
-    }
 
     if (EndAddress >= FRange->EndAddress) {
       RangeCrossFunc += I.second;
@@ -1325,9 +1319,6 @@ void PerfScriptReader::warnInvalidRange() {
   emitWarningSummary(
       InstNotBoundary, TotalRangeNum,
       "of samples are from ranges that are not on instruction boundary.");
-  emitWarningSummary(
-      UnmatchedRange, TotalRangeNum,
-      "of samples are from ranges that do not belong to any functions.");
   emitWarningSummary(
       RangeCrossFunc, TotalRangeNum,
       "of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 1dab93fc871d2..469d31d95c00a 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -824,6 +824,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
 
 void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
   // Load binary functions from symbol table when Debug info is incomplete
+  const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
+                            ".cold",    ".warm",   nullptr};
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -834,8 +836,6 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     if (Size == 0 || Type != SymbolRef::ST_Function)
       continue;
 
-    const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
-                              ".cold",    ".warm",   nullptr};
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
 

>From 8f59bfa035070c3cf638f696730156d26a5165fe Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 22 Oct 2025 20:12:53 -0700
Subject: [PATCH 07/24] Add cmdline option --load-function-from-symbol

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 469d31d95c00a..4a88c2becf133 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -65,6 +65,13 @@ static cl::list<std::string> DisassembleFunctions(
              "names only. Only work with show-disassembly-only"),
     cl::cat(ProfGenCategory));
 
+static cl::opt<bool>
+    LoadFunctionFromSymbol("load-function-from-symbol",
+                           cl::desc("Gather additional binary function info "
+                                    "from symbols (e.g. .symtab) in case "
+                                    "dwarf info is incomplete."),
+                           cl::cat(ProfGenCategory));
+
 static cl::opt<bool>
     KernelBinary("kernel",
                  cl::desc("Generate the profile for Linux kernel binary."),
@@ -257,7 +264,8 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
-  populateSymbolsFromBinary(Obj);
+  if (LoadFunctionFromSymbol || UsePseudoProbes)
+    populateSymbolsFromBinary(Obj);
 
   // Disassemble the text sections.
   disassemble(Obj);

>From a967994990b9bdd9dcd35c9739d1a8442c694d0a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 23 Oct 2025 10:25:29 -0700
Subject: [PATCH 08/24] Get symbol size only for ELFObjectFile

---
 llvm/include/llvm/Object/ELFObjectFile.h   | 6 ------
 llvm/include/llvm/Object/ObjectFile.h      | 6 ------
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 +++++-
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index cb7e6ef3458a9..ced1afdd4cc6a 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -310,7 +310,6 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
-  uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override;
   Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
   uint8_t getSymbolBinding(DataRefImpl Symb) const override;
   uint8_t getSymbolOther(DataRefImpl Symb) const override;
@@ -704,11 +703,6 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
   return getSymbolSize(Symb);
 }
 
-template <class ELFT>
-uint64_t ELFObjectFile<ELFT>::getSymbolSizeImpl(DataRefImpl Symb) const {
-  return getSymbolSize(Symb);
-}
-
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
   Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb);
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index bea61cf7c2214..289cc770e3466 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -198,7 +198,6 @@ class SymbolRef : public BasicSymbolRef {
   /// Get the alignment of this symbol as the actual value (not log 2).
   uint32_t getAlignment() const;
   uint64_t getCommonSize() const;
-  uint64_t getSize() const;
   Expected<SymbolRef::Type> getType() const;
 
   /// Get section this symbol is defined in reference to. Result is
@@ -256,7 +255,6 @@ class LLVM_ABI ObjectFile : public SymbolicFile {
   virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
   virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
   virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
-  virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; }
   virtual Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const = 0;
   virtual Expected<section_iterator>
   getSymbolSection(DataRefImpl Symb) const = 0;
@@ -484,10 +482,6 @@ inline uint64_t SymbolRef::getCommonSize() const {
   return getObject()->getCommonSymbolSize(getRawDataRefImpl());
 }
 
-inline uint64_t SymbolRef::getSize() const {
-  return getObject()->getSymbolSizeImpl(getRawDataRefImpl());
-}
-
 inline Expected<section_iterator> SymbolRef::getSection() const {
   return getObject()->getSymbolSection(getRawDataRefImpl());
 }
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 4a88c2becf133..93f605a891afc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -839,7 +839,11 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
     const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
-    const uint64_t Size = Symbol.getSize();
+    uint64_t Size = 0;
+    if (isa<ELFObjectFileBase>(Symbol.getObject())) {
+      ELFSymbolRef ElfSymbol(Symbol);
+      Size = ElfSymbol.getSize();
+    }
 
     if (Size == 0 || Type != SymbolRef::ST_Function)
       continue;

>From 0dc2c669f3639fb91545288ba76647deec172978 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 23 Oct 2025 14:49:54 -0700
Subject: [PATCH 09/24] Add unit test

---
 .../llvm-profgen/Inputs/missing-dwarf.exe     | Bin 0 -> 18744 bytes
 .../tools/llvm-profgen/missing-dwarf.test     |  40 ++++++++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    |   8 ++--
 3 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100755 llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe
 create mode 100644 llvm/test/tools/llvm-profgen/missing-dwarf.test

diff --git a/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe
new file mode 100755
index 0000000000000000000000000000000000000000..c4b8af0bf1f2ab87e195231a326fc756a7b46f23
GIT binary patch
literal 18744
zcmeHPYiu0V6+W|LI}Rc7CJ+d9h^OUIN2+IcZO5?<NyhPOU7X;?CIpEvncW%N3+vtO
z?rgA8MJ)n|&?31=rASDv`lAx6_|XUI;|HoCh_rtYP*t=dQbXIQNR&3Hr46*o_MAKS
ztaoR3L)1!brOuUS&pqEg-??|row;-8as9>7$+3{40H+Y_1=6&zAt3J;pnsoGCDsoc
z!GKQqG_)YngqtBsNP0PzzYKW=EL)|WijsKSC?lj%CASL=fF-w(B=OdgA+w%rZT3`_
z5cYK9i6W_k+&`8RvI3TsJW-$K`2o%FGE%-QVV0FVijARr2F6}PWLTb&^_`WHc@!?<
zO-bIA<gr|l_AEJ`)JEF+Wjy;;12xh;sgZ(76Tbj{-Vw<=BJEkSJxi*C()2mk{#WXo
zlDx1c+(1~yk>zJR7+7+BpF<wS<#H2u^C4MZqkdRbJdZ4;3Xyaw(bpSE+g<5YrnuO(
z*x%RH*Q*z at dbc1@yQmI2rp6E60ik7BB}|4<^aYiZKPsn0Wz at fv2<J at zm-$1SEp===
z5!346BMtEB1~`}|?s?KIxK`dZ7pznUNT(g|gYn6U{ljLr-qQ=_#LNNHcJj_#s^B{L
znFGV=Y{r?f5@`or%q?Uy(uL`n)-}X7ti?ltt5D~Dd;Tj=VRwh%dc-K5-De0%#{k8E
z(>oP7`=YY*6*%_^;TI}!KDG!CQV at Y40zm|V2m}!bA`nC%h(Hj5|Az?txoyWs at e_Y&
zjen{6?Fc~pkySTTdM$q9<<?VT_o+Pg-o(k*?}?xMP5i{~&QHyZ##UoLi=RHNZ8o5^
z^%-RSWma$Nc(`nTYE4~<Ke6Y><oAi@?~xd|;BLjLGP0`mrL%1v%S1dSCEA}9_PZV;
zvGZd5<oWoEf7};;@q!vxUWvbc(Y+cS?2!&yOJ`@>I`Afk`PAO!JwsRw#jEa!pFH=@
z1Eo^wmF5T{%F$CRr)Jk0*VbVG*}wbv8b*{K(GA8?^oV{*$M}R#kN$(2`OX>i7F)&m
zgpZyo$F3YLG6_-;fgl1w1cC?z5eOm>L?DPj5P={9K?H&b1QGb(i-1yo1Lw6(X;*Kd
zcdztrEPTFHila18-c7~tmr6OpQLd1V>1t1a^1vZb7Q;&WmFrsR-88+&+m74npGu{h
z$!26-n6M~rXAqOG>4gp6X2WAx#|D%zv?=^(Xn14G7&=0geAu`xVmal@<#&4J1O+LG
zKoEf-0zm|V2m}!bA`nC%h(Hj5AOb-I{sR%fm%x->liySGx0&=UC#5Srfj5MP$ZeGR
zt&(@0)cL#UcB%9C$)AvVe6JyL;vNulT>inuQkDu9RfB~0jW^10lR>KZmYU?#viu5C
zFfDij#62hGw at H1glq}ZC%(K!T*LzIj>!oD=#gf;a$D}>JE*9<MxZrK9&>xiLlKx*4
z{9Spy28<664{DwG_3}<F7Sng>QLQ`L-5cE<8_+rrIkpzJTv^Z+i|vGD+RDsn$DMp3
zmCb0~-Fj4y&ZS(fbKZ4xg~3P!mHA>qPi7Y)>GW~$rz at Aw-s>b?oh+?^zTE=@y#oW$
z#6ZHf2kfNXvnx5UyJx_TS^d4e-O0XzUOQ^-grUQOy}4NTKpz}BJeV(LGAXQjE}8T<
z6!sq;v@)4&B9*CIJbve3bg{?oj_&HSG+mDrmI}6$D at 5>f?}(L1b!D=yqbFf{YOn{U
zhX-RYJT};W3B$3;!LHaP%EyP$$J7LQ8y$>7Y8rp4+K|emT&z%3BlDh5sZ<%`N7`;G
z*_C(F*(8&Em`C=3&%Lnit;v at z+>Hlki8@}L&1OIP#S_2pv%mV5hbhoR-+nU~0%f@^
z^bjynLA^<wgY-;?KFA&)x>4z|V&!!sWAc2b^Tls-hb&`R&xXf~YcAo3$B+>@melie
zO!8YUVZ(k{*7L(<EbG~DTiY+uW^U_*Y|o7tpJr$xcko91RrBpP-g`rg6#`MjIta=t
zpM~(X`D@#6dVc?P$8Xx)zNMvbEy6U6HDgEYCKE;Z`=*T~$N_Gc=@7nDr6bhVyh9Nm
z$p7>n{rq!}+<W40<J;fd-ScuBpY*K{g=KS;X7!5OS~eVMhL-!ARcLAY+Vb+Rlq=5N
z)v`ekwV+2G7E*-*o^4(B{Z at Wfx9`u2F`gR1PIPdc0v*LP`16B4s{50;c^~O_VvdIP
zC?0vRlQLqd0cL=9Yt5(^wUt!HrIL?JD(>_))u=I3(O-XET!kZb at uu?iq|%v+JHywJ
zYP_X<y{X36LA`Na#aQw6q1t{u at O7pd$E%h+rmFD`<@2o?zY_R3t;X@{s~MG$$g8la
zybh<bP=z)y>e`2auh-S~n}M&>)%X^ucb!pj#RsqJsJ6$a%7#%1iM$G1VOw4N8rWVJ
zr(>x$spYGEb(so08}Z{j3-ROapdB-+LDbaps<rAOwY-|ekMF1kq^3eg16*r>(+O6a
zRQOaah=>X|G{9+8s!j^3P51(!8U<V}NJGuqd8A2$ILdnV(};&~*2UwpGzVHCMfi&E
zIG>j|&o9~jLCh$y*6-9lN{{2FK<#n(UBt;h$bpV*DZPN3f>$X2 at q7mHdhKttU%wT8
z+F(^PFjt-G`FS5r6sSEfJ`&X0^Km2Yph9n3e8>3>h;ON`PG|<=S{?j8f!Cf#cOxFg
z^UHr8B at y2esvS>K(%yeQ9vA*=D}Ds=&7qCZ>Wj~lgyU*b9+&WHB+ at q#--7+c*A$+o
ze+%(2_Pgeb!?(#kv;n5%@xzy$tn{Nv{ATI at CF#e%zpt{rJWlw2<L8KP4sETe_t%JP
z(r2akko<(|jN6f7Ay117BRE^7X{OoNixW6|-g3Q_hLKn-f)mm5pSKP%nRjD)7FX8c
zq||h+IY6M`7V%@&B&@B%Gu;I<N$c<mXlZB7xpX#RrA^z-<_o4(Tm+nY=hBYr*m|_S
zg*l7!Ow-EettHdRxcMcR&07nOX%`n3mXK1RnKUD<GL=_-(;PcAbYRpRJvd_G>c5e@
z4h|id7)CPQNtmN?SsEWX1m^hU?fZu&&D+PurblPYnW6oYqsZ{DKr}9Q-JxS!t_5Nr
zX!lU at 79pZiy&}=H3t4mC%Gk78apHEgvQrteSa58h2?_4H4W^wo#mw4r9dd&+Z8Mjm
z)s2ZlLHZSw7SqJlkG!|EEVAxeM-eUny|A?4S_zbH-jnm3!&#1#&w(!PIp#7&9dC_t
zPTpOT(9r&gF6w%n%I2-YJm~gP2K{-`&3h&C2CPaku?+XcA_C~taiF`-BK}f;V_8;2
zPIu;I56|1kV9jHAUF|VAkM696R1)jTqA&c>b7{<Y&M-_sH+u_sAb~FL!o=Poo+~&-
zJVJ376L at qv1@ut~d96<rQ)yebTsNOe6!GXn&rYH^hoPL!a^6ZL^3HMAaIvn#d5kM(
zEt~eoBa;bsUs=bjnVp?2IBwZL2H4CM6S!|{bxO=#$|3geCwk6-Lj+!vyu|ZXUJuXX
zjw}|ooF_?FZhAJse4gL(`fm-(DpPBvg84q&D)~GQ=5^qj&z{F+hDR{-SA3Si^)IT1
zC|XoK3vNG;!~XgYq8W|p%zs+)pO*YZ&y9F~Lvdn0KW90N7 at 0DkpZA1iL;3lRbW;<*
zd$53>%`l(m=?_XiKPTe$vmML(kWbHgSm*itW0J4Qid#@twqL^V0A|RP`8 at A`R`QQY
zL$2Sy{|{q1`6d4%@w`dgYy<FfG%l8IS(WpoH9lV-HOb#j&MT6H%g74mD(mO_jaxT+
zCU`}!O8)wv!cyY%{`2)8G>_)5kN4jz1^7B<D*62X1Ni?6 at c$2Bdw>1U`S^xr2vb87
z0w}(f$zT5uuy7M@@^CHBANl_b`0ZI|`4V#5D)@Zg6qo<^f$h*lB-UBJiYWCx*U$G?
z{C^|(|3mQc!90%Zo5-d3F`w_l&PEW#q>10PALg^9`Eez`JTHxU96nFk1M^s(^YM8e
zxlQu@{Y7<Ergu>xU)+A4$8En%{(0n*Pic-hFERfa2BhTrS$>FkJ^l}+!@X?SfZvMc
zJ`-X2LCho2Gm*;Vw{4gF<w_&W at 96e`>m|<fW<P%?0{m>7O!+?cR>>dn5&ZqJPu4H1
zDU0PEFWXs0yo}Es+}42qtmOOe-2ME!8t{K2`Jexo`W?yl_kWZgIAOv&t*XTJv&=Q9
ze?;=bRqWcn{z8NLXC$BBE3g}`m*tlm)PM9c>;Far{(|J+#1%`)^)meZ2K*-_-+%q_
z^GPp{{p=LwcO-vR4qP`QMspP&H+Wp3`0%-p*>cL=;$=&|3VkZxbJpi)_jvr%K7vn$
Fe*(I&+5G?j

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
new file mode 100644
index 0000000000000..0f0b8bc30de47
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -0,0 +1,40 @@
+; RUN: rm -rf %t
+; RUN: mkdir -p %t
+; RUN: cd %t
+
+; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof
+; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe
+
+; Test --load-function-from-symbol=0
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
+
+; CHECK-NO-LOAD-SYMTAB:      warning: 100.00%(1/1) of function range samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
+
+; Test --load-function-from-symbol=1
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB
+
+; CHECK-LOAD-SYMTAB:      main:2:1
+; CHECK-LOAD-SYMTAB-NEXT:  1: 1
+; CHECK-LOAD-SYMTAB-NEXT:  2: 1 foo:1
+; CHECK-LOAD-SYMTAB-NEXT:  !CFGChecksum: 281479271677951
+; CHECK-LOAD-SYMTAB-NEXT: foo:0:0
+; CHECK-LOAD-SYMTAB-NEXT:  1: 0
+; CHECK-LOAD-SYMTAB-NEXT:  !CFGChecksum: 4294967295
+
+; Build instructions:
+; missing-dwarf.o:       clang -gsplit-dwarf=split -fdebug-compilation-dir=.  test.c   -fdebug-info-for-profiling  -fpseudo-probe-for-profiling  -O0 -g -o missing-dwarf.o -c
+; missing-dwarf.exe:     clang -fdebug-compilation-dir=.  missing-dwarf.o -o missing-dwarf.exe  -fdebug-info-for-profiling  -fpseudo-probe-for-profiling  -O0 -g
+
+; Source code:
+
+int foo() {
+  return 1;
+}
+
+int main() {
+  foo();
+  return 0;
+}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 93f605a891afc..f601fc133df1c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -66,7 +66,7 @@ static cl::list<std::string> DisassembleFunctions(
     cl::cat(ProfGenCategory));
 
 static cl::opt<bool>
-    LoadFunctionFromSymbol("load-function-from-symbol",
+    LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
                            cl::desc("Gather additional binary function info "
                                     "from symbols (e.g. .symtab) in case "
                                     "dwarf info is incomplete."),
@@ -264,7 +264,7 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
-  if (LoadFunctionFromSymbol || UsePseudoProbes)
+  if (LoadFunctionFromSymbol && UsePseudoProbes)
     populateSymbolsFromBinary(Obj);
 
   // Disassemble the text sections.
@@ -853,8 +853,10 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
 
     auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
     auto &Func = Ret.first->second;
-    if (Ret.second)
+    if (Ret.second) {
       Func.FuncName = Ret.first->first;
+      HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+    }
 
     if (auto Range = findFuncRange(Addr)) {
       if (Ret.second && ShowDetailedWarning)

>From 75dc996056424a43aa3b68be63f14cdf106ba340 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 24 Oct 2025 16:16:40 -0700
Subject: [PATCH 10/24] Nit

---
 llvm/include/llvm/ProfileData/SampleProf.h      | 9 +++++----
 llvm/test/tools/llvm-profgen/missing-dwarf.test | 5 ++---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp      | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 6de5884253017..dd0495f29e8c3 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,18 +1214,19 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr};
+    const SmallVector<StringRef> KnownSuffixes{LLVMSuffix, PartSuffix,
+                                               UniqSuffix};
     return getCanonicalFnName(FnName, KnownSuffixes, Attr);
   }
 
-  static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[],
+  static StringRef getCanonicalFnName(StringRef FnName,
+                                      ArrayRef<StringRef> Suffixes,
                                       StringRef Attr = "selected") {
     if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
     if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const char **Suf = Suffixes; *Suf; Suf++) {
-        StringRef Suffix(*Suf);
+      for (const auto Suffix : Suffixes) {
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
index 0f0b8bc30de47..0fc0d660133f1 100644
--- a/llvm/test/tools/llvm-profgen/missing-dwarf.test
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -3,17 +3,16 @@
 ; RUN: cd %t
 
 ; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof
-; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe
 
 ; Test --load-function-from-symbol=0
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
 
 ; CHECK-NO-LOAD-SYMTAB:      warning: 100.00%(1/1) of function range samples do not belong to any function
 ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
 ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
 
 ; Test --load-function-from-symbol=1
-; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB
 
 ; CHECK-LOAD-SYMTAB:      main:2:1
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index f601fc133df1c..c1c0ea5fe6b3e 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -832,8 +832,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
 
 void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
   // Load binary functions from symbol table when Debug info is incomplete
-  const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
-                            ".cold",    ".warm",   nullptr};
+  const SmallVector<StringRef> Suffixes(
+      {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);

>From 5600e83a5f36ce7fe754f7a71a764d0cf35af15f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Wed, 29 Oct 2025 17:21:49 -0700
Subject: [PATCH 11/24] Cleanup loggings and comments

---
 .../tools/llvm-profgen/missing-dwarf.test     |  4 +-
 llvm/tools/llvm-profgen/PerfReader.cpp        | 18 +++++++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 33 ---------------
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    | 41 ++++++++++++-------
 llvm/tools/llvm-profgen/ProfiledBinary.h      |  1 +
 5 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test
index 0fc0d660133f1..b96ae9018dae1 100644
--- a/llvm/test/tools/llvm-profgen/missing-dwarf.test
+++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test
@@ -7,9 +7,7 @@
 ; Test --load-function-from-symbol=0
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB
 
-; CHECK-NO-LOAD-SYMTAB:      warning: 100.00%(1/1) of function range samples do not belong to any function
-; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function
-; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function
+; CHECK-NO-LOAD-SYMTAB:      warning: Loading of DWARF info completed, but no binary functions have been retrieved.
 
 ; Test --load-function-from-symbol=1
 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index e9f7b666c95c7..1c4b79554e1a5 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1274,6 +1274,8 @@ void PerfScriptReader::warnInvalidRange() {
 
   const char *EndNotBoundaryMsg = "Range is not on instruction boundary, "
                                   "likely due to profile and binary mismatch.";
+  const char *DanglingRangeMsg = "Range does not belong to any functions, "
+                                 "likely from PLT, .init or .fini section.";
   const char *RangeCrossFuncMsg =
       "Fall through range should not cross function boundaries, likely due to "
       "profile and binary mismatch.";
@@ -1281,6 +1283,8 @@ void PerfScriptReader::warnInvalidRange() {
 
   uint64_t TotalRangeNum = 0;
   uint64_t InstNotBoundary = 0;
+  uint64_t UnmatchedRange = 0;
+  uint64_t RecoveredRange = 0;
   uint64_t RangeCrossFunc = 0;
   uint64_t BogusRange = 0;
 
@@ -1300,8 +1304,14 @@ void PerfScriptReader::warnInvalidRange() {
     }
 
     auto *FRange = Binary->findFuncRange(StartAddress);
-    if (!FRange)
+    if (!FRange) {
+      UnmatchedRange += I.second;
+      WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg);
       continue;
+    }
+
+    if (FRange->Func->FromSymtab)
+      RecoveredRange += I.second;
 
     if (EndAddress >= FRange->EndAddress) {
       RangeCrossFunc += I.second;
@@ -1319,6 +1329,12 @@ void PerfScriptReader::warnInvalidRange() {
   emitWarningSummary(
       InstNotBoundary, TotalRangeNum,
       "of samples are from ranges that are not on instruction boundary.");
+  emitWarningSummary(
+      UnmatchedRange, TotalRangeNum,
+      "of samples are from ranges that do not belong to any functions.");
+  emitWarningSummary(
+      RecoveredRange, TotalRangeNum,
+      "of samples are from ranges that belong to functions recovered from symbol table.");
   emitWarningSummary(
       RangeCrossFunc, TotalRangeNum,
       "of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 2f6f50912fbcf..3b875c5de3c09 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,62 +449,29 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   // Go through all the stacks, ranges and branches in sample counters, use
   // the start of the range to look up the function it belongs and record the
   // function.
-  uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
-  uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
   for (const auto &CI : *SampleCounters) {
     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
       for (auto StackAddr : CtxKey->Context) {
-        uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
-        TotalStkAddr += Inc;
         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
           ProfiledFunctions.insert(FRange->Func);
-        else
-          ErrStkAddr += Inc;
       }
     }
 
     for (auto Item : CI.second.RangeCounter) {
       uint64_t StartAddress = Item.first.first;
-      uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0;
-      TotalFuncRange += Inc;
       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
         ProfiledFunctions.insert(FRange->Func);
-      else
-        ErrFuncRange += Inc;
     }
 
     for (auto Item : CI.second.BranchCounter) {
       uint64_t SourceAddress = Item.first.first;
       uint64_t TargetAddress = Item.first.second;
-      uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0;
-      uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0;
-      TotalSrc += SrcInc;
       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
         ProfiledFunctions.insert(FRange->Func);
-      else
-        ErrSrc += SrcInc;
-      TotalTgt += TgtInc;
       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
         ProfiledFunctions.insert(FRange->Func);
-      else
-        ErrTgt += TgtInc;
     }
   }
-
-  if (ErrStkAddr)
-    emitWarningSummary(
-        ErrStkAddr, TotalStkAddr,
-        "of stack address samples do not belong to any function");
-  if (ErrFuncRange)
-    emitWarningSummary(
-        ErrFuncRange, TotalFuncRange,
-        "of function range samples do not belong to any function");
-  if (ErrSrc)
-    emitWarningSummary(ErrSrc, TotalSrc,
-                       "of LBR source samples do not belong to any function");
-  if (ErrTgt)
-    emitWarningSummary(ErrTgt, TotalTgt,
-                       "of LBR target samples do not belong to any function");
   return true;
 }
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index c1c0ea5fe6b3e..e65bafa9ac4ca 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -831,13 +831,21 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
 }
 
 void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
-  // Load binary functions from symbol table when Debug info is incomplete
-  const SmallVector<StringRef> Suffixes(
-      {".destroy", ".resume", ".llvm.", ".cold", ".warm"});
+  // Load binary functions from symbol table when Debug info is incomplete.
+  // Strip the internal suffixes which are not reflected in the DWARF info.
+  const SmallVector<StringRef, 6> Suffixes(
+      {
+        // Internal suffixes from CoroSplit pass
+        ".cleanup", ".destroy", ".resume",
+        // Internal suffixes from Bolt
+        ".cold", ".warm",
+        // Compiler internal
+        ".llvm.",
+      });
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
-    const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+    const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName);
     const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
     uint64_t Size = 0;
     if (isa<ELFObjectFileBase>(Symbol.getObject())) {
@@ -855,25 +863,26 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     auto &Func = Ret.first->second;
     if (Ret.second) {
       Func.FuncName = Ret.first->first;
+      Func.FromSymtab = true;
       HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
     }
 
-    if (auto Range = findFuncRange(Addr)) {
-      if (Ret.second && ShowDetailedWarning)
+    if (auto Range = findFuncRange(StartAddr)) {
+      if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
         WithColor::warning()
-            << "Symbol " << Name << " start address "
-            << format("%8" PRIx64, Addr) << " already exists in DWARF at "
-            << format("%8" PRIx64, Range->StartAddress) << " in function "
-            << Range->getFuncName() << "\n";
+            << "Conflicting symbol " << Name << " already exists in DWARF as "
+            << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr)
+            << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to "
+            << format("%8" PRIx64, Range->EndAddress) << "\n";
     } else {
       // Store/Update Function Range from SymTab
-      Func.Ranges.emplace_back(Addr, Addr + Size);
+      Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
 
-      auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+      auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
       FuncRange &FRange = R.first->second;
       FRange.Func = &Func;
-      FRange.StartAddress = Addr;
-      FRange.EndAddress = Addr + Size;
+      FRange.StartAddress = StartAddr;
+      FRange.EndAddress = StartAddr + Size;
     }
   }
 }
@@ -902,8 +911,10 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
     // BinaryFunction indexed by the name.
     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
     auto &Func = Ret.first->second;
-    if (Ret.second)
+    if (Ret.second) {
       Func.FuncName = Ret.first->first;
+      Func.FromSymtab = false;
+    }
 
     for (const auto &Range : Ranges) {
       uint64_t StartAddress = Range.LowPC;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index e73ffd3143e3d..753fcc935716b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,6 +76,7 @@ struct BinaryFunction {
   StringRef FuncName;
   // End of range is an exclusive bound.
   RangesTy Ranges;
+  bool FromSymtab;
 
   uint64_t getFuncSize() {
     uint64_t Sum = 0;

>From 0df504e8a998902821fe40db4b4edcc6ccb5259e Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 30 Oct 2025 22:47:56 -0700
Subject: [PATCH 12/24] Mark function FromSymtab too if new range is found

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e65bafa9ac4ca..b1e32757c31a9 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -840,7 +840,7 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
         // Internal suffixes from Bolt
         ".cold", ".warm",
         // Compiler internal
-        ".llvm.",
+        ".llvm."
       });
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
@@ -877,6 +877,7 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     } else {
       // Store/Update Function Range from SymTab
       Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
+      Func.FromSymtab = true;
 
       auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
       FuncRange &FRange = R.first->second;

>From 644fce908183f8069582bd127d7499e4a8e031fa Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 30 Oct 2025 23:05:13 -0700
Subject: [PATCH 13/24] Formatting

---
 llvm/tools/llvm-profgen/PerfReader.cpp     |  6 +++---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 1c4b79554e1a5..a8a9c6eda85fd 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1332,9 +1332,9 @@ void PerfScriptReader::warnInvalidRange() {
   emitWarningSummary(
       UnmatchedRange, TotalRangeNum,
       "of samples are from ranges that do not belong to any functions.");
-  emitWarningSummary(
-      RecoveredRange, TotalRangeNum,
-      "of samples are from ranges that belong to functions recovered from symbol table.");
+  emitWarningSummary(RecoveredRange, TotalRangeNum,
+                     "of samples are from ranges that belong to functions "
+                     "recovered from symbol table.");
   emitWarningSummary(
       RangeCrossFunc, TotalRangeNum,
       "of samples are from ranges that do cross function boundaries.");
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index b1e32757c31a9..9ae10c1faaa0c 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -834,14 +834,12 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
   // Load binary functions from symbol table when Debug info is incomplete.
   // Strip the internal suffixes which are not reflected in the DWARF info.
   const SmallVector<StringRef, 6> Suffixes(
-      {
-        // Internal suffixes from CoroSplit pass
-        ".cleanup", ".destroy", ".resume",
-        // Internal suffixes from Bolt
-        ".cold", ".warm",
-        // Compiler internal
-        ".llvm."
-      });
+      {// Internal suffixes from CoroSplit pass
+       ".cleanup", ".destroy", ".resume",
+       // Internal suffixes from Bolt
+       ".cold", ".warm",
+       // Compiler internal
+       ".llvm."});
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -871,8 +869,10 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
       if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
         WithColor::warning()
             << "Conflicting symbol " << Name << " already exists in DWARF as "
-            << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr)
-            << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to "
+            << Range->getFuncName() << " at address "
+            << format("%8" PRIx64, StartAddr)
+            << ". The DWARF indicates a range from "
+            << format("%8" PRIx64, Range->StartAddress) << " to "
             << format("%8" PRIx64, Range->EndAddress) << "\n";
     } else {
       // Store/Update Function Range from SymTab

>From 9188eff74c1fcfe4a21ff41ef92d8df4a8b4b1f2 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 31 Oct 2025 19:26:25 -0700
Subject: [PATCH 14/24] Fix suffix strip && refactor the function checks

---
 llvm/include/llvm/ProfileData/SampleProf.h |  2 +-
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 53 +++++++++++-----------
 llvm/tools/llvm-profgen/ProfiledBinary.h   |  4 +-
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index dd0495f29e8c3..2bf3312446443 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1235,7 +1235,7 @@ class FunctionSamples {
         if (It == StringRef::npos)
           continue;
         auto Dit = Cand.rfind('.');
-        if (Dit == It + Suffix.size() - 1)
+        if (Dit == It || Dit == It + Suffix.size() - 1)
           Cand = Cand.substr(0, It);
       }
       return Cand;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 9ae10c1faaa0c..215a9463aad33 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -265,7 +265,7 @@ void ProfiledBinary::load() {
     decodePseudoProbe(Obj);
 
   if (LoadFunctionFromSymbol && UsePseudoProbes)
-    populateSymbolsFromBinary(Obj);
+    loadSymbolsFromSymtab(Obj);
 
   // Disassemble the text sections.
   disassemble(Obj);
@@ -830,16 +830,16 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
-void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
   // Load binary functions from symbol table when Debug info is incomplete.
   // Strip the internal suffixes which are not reflected in the DWARF info.
-  const SmallVector<StringRef, 6> Suffixes(
+  const SmallVector<StringRef, 10> Suffixes(
       {// Internal suffixes from CoroSplit pass
        ".cleanup", ".destroy", ".resume",
        // Internal suffixes from Bolt
        ".cold", ".warm",
-       // Compiler internal
-       ".llvm."});
+       // Compiler/LTO internal
+       ".llvm.", ".part.", ".isra.", ".constprop.", ".lto_priv."});
   StringRef FileName = Obj->getFileName();
   for (const SymbolRef &Symbol : Obj->symbols()) {
     const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
@@ -857,33 +857,34 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
 
-    auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
-    auto &Func = Ret.first->second;
-    if (Ret.second) {
-      Func.FuncName = Ret.first->first;
-      Func.FromSymtab = true;
-      HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
-    }
+    auto Range = findFuncRange(StartAddr);
+    if (!Range || Range->StartAddress != StartAddr) {
+      // Function from symbol table not found previously in DWARF, store ranges.
+      auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+      auto &Func = Ret.first->second;
+      if (Ret.second) {
+        Func.FuncName = Ret.first->first;
+        HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+      }
 
-    if (auto Range = findFuncRange(StartAddr)) {
-      if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning)
-        WithColor::warning()
-            << "Conflicting symbol " << Name << " already exists in DWARF as "
-            << Range->getFuncName() << " at address "
-            << format("%8" PRIx64, StartAddr)
-            << ". The DWARF indicates a range from "
-            << format("%8" PRIx64, Range->StartAddress) << " to "
-            << format("%8" PRIx64, Range->EndAddress) << "\n";
-    } else {
-      // Store/Update Function Range from SymTab
-      Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
       Func.FromSymtab = true;
+      Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
 
       auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
       FuncRange &FRange = R.first->second;
+
       FRange.Func = &Func;
       FRange.StartAddress = StartAddr;
       FRange.EndAddress = StartAddr + Size;
+
+    } else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
+      // Function already found from DWARF, check consistency between symbol
+      // table and DWARF.
+      WithColor::warning() << "Conflicting name for symbol" << Name
+                           << " at address " << format("%8" PRIx64, StartAddr)
+                           << ", but the DWARF symbol " << Range->getFuncName()
+                           << " indicates a starting address at "
+                           << format("%8" PRIx64, Range->StartAddress) << "\n";
     }
   }
 }
@@ -912,10 +913,8 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
     // BinaryFunction indexed by the name.
     auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
     auto &Func = Ret.first->second;
-    if (Ret.second) {
+    if (Ret.second)
       Func.FuncName = Ret.first->first;
-      Func.FromSymtab = false;
-    }
 
     for (const auto &Range : Ranges) {
       uint64_t StartAddress = Range.LowPC;
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 753fcc935716b..50b2caaa0a5e3 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,7 +76,7 @@ struct BinaryFunction {
   StringRef FuncName;
   // End of range is an exclusive bound.
   RangesTy Ranges;
-  bool FromSymtab;
+  bool FromSymtab = false;
 
   uint64_t getFuncSize() {
     uint64_t Sum = 0;
@@ -358,7 +358,7 @@ class ProfiledBinary {
   void populateSymbolAddressList(const object::ObjectFile *O);
 
   // Load functions from its symbol table (when DWARF info is missing).
-  void populateSymbolsFromBinary(const object::ObjectFile *O);
+  void loadSymbolsFromSymtab(const object::ObjectFile *O);
 
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start a function range is the real entry of the

>From b6ae0ba636db8ec09681bc05bd19d4cd83ed4acd Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Tue, 11 Nov 2025 23:55:17 -0800
Subject: [PATCH 15/24] Fixup corrupted DWARF function names using symbol table
 info

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 65 ++++++++++++++++++----
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 215a9463aad33..5b241ba5d9cfc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -854,11 +854,12 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
     if (Size == 0 || Type != SymbolRef::ST_Function)
       continue;
 
+    const uint64_t EndAddr = StartAddr + Size;
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
 
     auto Range = findFuncRange(StartAddr);
-    if (!Range || Range->StartAddress != StartAddr) {
+    if (!Range) {
       // Function from symbol table not found previously in DWARF, store ranges.
       auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
       auto &Func = Ret.first->second;
@@ -868,23 +869,67 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
       }
 
       Func.FromSymtab = true;
-      Func.Ranges.emplace_back(StartAddr, StartAddr + Size);
+      Func.Ranges.emplace_back(StartAddr, EndAddr);
 
       auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
       FuncRange &FRange = R.first->second;
 
       FRange.Func = &Func;
       FRange.StartAddress = StartAddr;
-      FRange.EndAddress = StartAddr + Size;
+      FRange.EndAddress = EndAddr;
 
-    } else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
-      // Function already found from DWARF, check consistency between symbol
-      // table and DWARF.
-      WithColor::warning() << "Conflicting name for symbol" << Name
-                           << " at address " << format("%8" PRIx64, StartAddr)
+    } else if (SymName != Range->getFuncName()) {
+      // Function range already found from DWARF, but the symbol name from
+      // symbol table is inconsistent with debug info.
+      if (ShowDetailedWarning)
+        WithColor::warning()
+            << "Conflicting name for symbol " << Name << " with range ("
+            << format("%8" PRIx64, StartAddr) << ", "
+            << format("%8" PRIx64, EndAddr) << ")"
+            << ", but the DWARF symbol " << Range->getFuncName()
+            << " indicates an overlapping range ("
+            << format("%8" PRIx64, Range->StartAddress) << ", "
+            << format("%8" PRIx64, Range->EndAddress) << ")\n";
+
+      assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
+             "Mismatched function range");
+
+      auto ErrSym = BinaryFunctions.find(Range->getFuncName().str());
+      auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+      auto &Func = Ret.first->second;
+
+      // Symbol table may contain multiple symbol names of the same starting
+      // address. Only need to pick one from these.
+      if (!Ret.second)
+        continue;
+
+      Func.FuncName = Ret.first->first;
+      Func.Ranges = ErrSym->second.Ranges;
+      Func.FromSymtab = true;
+
+      HashBinaryFunctions.erase(MD5Hash(Range->getFuncName()));
+      BinaryFunctions.erase(ErrSym);
+
+      HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
+      Range->Func = &Func;
+      for (auto [RangeStart, _] : Func.Ranges) {
+        if (auto FRange = findFuncRangeForStartAddr(RangeStart)) {
+          assert(FRange && "Cannot find function range");
+          FRange->Func = &Func;
+        }
+      }
+    } else if (StartAddr != Range->StartAddress &&
+               EndAddr != Range->EndAddress) {
+      // Function already found in DWARF, but the address range from symbol
+      // table conflicts/overlaps with the debug info.
+      WithColor::warning() << "Conflicting range for symbol " << Name
+                           << " with range (" << format("%8" PRIx64, StartAddr)
+                           << ", " << format("%8" PRIx64, EndAddr) << ")"
                            << ", but the DWARF symbol " << Range->getFuncName()
-                           << " indicates a starting address at "
-                           << format("%8" PRIx64, Range->StartAddress) << "\n";
+                           << " indicates another range ("
+                           << format("%8" PRIx64, Range->StartAddress) << ", "
+                           << format("%8" PRIx64, Range->EndAddress) << ")\n";
+      llvm_unreachable("invalid function range");
     }
   }
 }

>From b1cbdd08bd67370505f38f2bb666510b2cb014b2 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 14 Nov 2025 14:45:10 -0800
Subject: [PATCH 16/24] Fixup overwritten DWARF symbol name when decoding
 pseudo probe

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 13 ++++++++++++-
 llvm/tools/llvm-profgen/ProfiledBinary.h   |  4 ++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 5b241ba5d9cfc..52cb477848b68 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -828,6 +828,14 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
     SymbolStartAddrs[GUID] = Addr;
     StartAddrToSymMap.emplace(Addr, GUID);
   }
+
+  // Load DWARF name too if they are overwritten by the symbol table
+  for (auto [OldGUID, Func] : OverriddenBinaryFunctions) {
+    uint64_t GUID = Function::getGUIDAssumingExternalLinkage(Func->FuncName);
+    uint64_t Addr = SymbolStartAddrs[GUID];
+    SymbolStartAddrs[OldGUID] = Addr;
+    StartAddrToSymMap.emplace(Addr, OldGUID);
+  }
 }
 
 void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
@@ -903,13 +911,16 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
       if (!Ret.second)
         continue;
 
+      uint64_t OldGUID = MD5Hash(Range->getFuncName());
+
       Func.FuncName = Ret.first->first;
       Func.Ranges = ErrSym->second.Ranges;
       Func.FromSymtab = true;
 
-      HashBinaryFunctions.erase(MD5Hash(Range->getFuncName()));
+      HashBinaryFunctions.erase(OldGUID);
       BinaryFunctions.erase(ErrSym);
 
+      OverriddenBinaryFunctions[OldGUID] = &Func;
       HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
       Range->Func = &Func;
       for (auto [RangeStart, _] : Func.Ranges) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 50b2caaa0a5e3..8858300678343 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -232,6 +232,10 @@ class ProfiledBinary {
   // GUID to symbol start address map
   DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
 
+  // GUID mapping of the overridden DWARF symbol names to the binary functions
+  // with the symbol table names
+  std::unordered_map<uint64_t, BinaryFunction *> OverriddenBinaryFunctions;
+
   // These maps are for temporary use of warning diagnosis.
   DenseSet<int64_t> AddrsWithMultipleSymbols;
   DenseSet<std::pair<uint64_t, uint64_t>> AddrsWithInvalidInstruction;

>From 8927a273ee063fe6446e0a6e597aebb51f895352 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Sat, 15 Nov 2025 22:14:21 -0800
Subject: [PATCH 17/24] Fixup GuidFilter && Pseudo probe callee mismatch

---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 14 ++++++++++++--
 llvm/tools/llvm-profgen/ProfileGenerator.h   |  2 +-
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   |  8 +++++++-
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 3b875c5de3c09..075250c4b1ba8 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -723,7 +723,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
 }
 
 StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName) {
   // Get the function range by branch target if it's a call branch.
   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
 
@@ -732,6 +732,16 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
   if (!FRange || !FRange->IsFuncEntry)
     return StringRef();
 
+  if (RestoreSymbolName && FRange->Func->FromSymtab) {
+    const AddressProbesMap &Address2ProbesMap =
+        Binary->getAddress2ProbesMap();
+    for (const MCDecodedPseudoProbe &Probe :
+         Address2ProbesMap.find(TargetAddress)) {
+      if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
+        return FunctionSamples::getCanonicalFnName(ProbeDesc->FuncName);
+    }
+  }
+
   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
 }
 
@@ -1352,7 +1362,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
     FunctionProfile.addTotalSamples(Count);
-    StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
+    StringRef CalleeName = getCalleeNameForAddress(TargetAddress, true);
     if (CalleeName.size() == 0)
       continue;
     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index a4b738016ec3a..3cf13e2618460 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,7 +109,7 @@ class ProfileGeneratorBase {
 
   bool filterAmbiguousProfile(FunctionSamples &FS);
 
-  StringRef getCalleeNameForAddress(uint64_t TargetAddress);
+  StringRef getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName = false);
 
   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 52cb477848b68..893ad8607a7ec 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -473,8 +473,12 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
       GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
       for (auto &Range : F->Ranges) {
         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
-        for (const auto &[StartAddr, Func] : make_range(GUIDs))
+        for (const auto &[StartAddr, Func] : make_range(GUIDs)) {
           FuncStartAddresses[Func] = StartAddr;
+          // Function name may be changed when symbol table is loaded. Adding
+          // back the original GUID if possible
+          GuidFilter.insert(Func);
+        }
       }
     }
   }
@@ -865,9 +869,11 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
     const uint64_t EndAddr = StartAddr + Size;
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
+    assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress());
 
     auto Range = findFuncRange(StartAddr);
     if (!Range) {
+      assert(findFuncRange(EndAddr - 1) == nullptr);
       // Function from symbol table not found previously in DWARF, store ranges.
       auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
       auto &Func = Ret.first->second;

>From 108bc08571d573dddaf6b79f084a5fdd7cc8040a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 14:15:52 -0800
Subject: [PATCH 18/24] format

---
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 6 +++---
 llvm/tools/llvm-profgen/ProfileGenerator.h   | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 075250c4b1ba8..efc6b6a0595a0 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -723,7 +723,8 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
 }
 
 StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
+                                              bool RestoreSymbolName) {
   // Get the function range by branch target if it's a call branch.
   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
 
@@ -733,8 +734,7 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress, bool Resto
     return StringRef();
 
   if (RestoreSymbolName && FRange->Func->FromSymtab) {
-    const AddressProbesMap &Address2ProbesMap =
-        Binary->getAddress2ProbesMap();
+    const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
     for (const MCDecodedPseudoProbe &Probe :
          Address2ProbesMap.find(TargetAddress)) {
       if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 3cf13e2618460..9c347bb78dae1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,7 +109,8 @@ class ProfileGeneratorBase {
 
   bool filterAmbiguousProfile(FunctionSamples &FS);
 
-  StringRef getCalleeNameForAddress(uint64_t TargetAddress, bool RestoreSymbolName = false);
+  StringRef getCalleeNameForAddress(uint64_t TargetAddress,
+                                    bool RestoreSymbolName = false);
 
   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
 

>From 6e3a5aca55d1cd6c93c7e42988d4ee0201355f89 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 15:27:46 -0800
Subject: [PATCH 19/24] Clean up fixup logic in pseudo probe

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 21 +++++++--------------
 llvm/tools/llvm-profgen/ProfiledBinary.h   |  5 +++--
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 893ad8607a7ec..113a7ab818c24 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -471,14 +471,15 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
   } else {
     for (auto *F : ProfiledFunctions) {
       GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
+      // Function name may be changed when symbol table is loaded. Adding
+      // back the original GUID if possible
+      auto OldGuid = OverriddenBinaryFunctions.find(F);
+      if (OldGuid != OverriddenBinaryFunctions.end())
+        GuidFilter.insert(OldGuid->second);
       for (auto &Range : F->Ranges) {
         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
-        for (const auto &[StartAddr, Func] : make_range(GUIDs)) {
+        for (const auto &[StartAddr, Func] : make_range(GUIDs))
           FuncStartAddresses[Func] = StartAddr;
-          // Function name may be changed when symbol table is loaded. Adding
-          // back the original GUID if possible
-          GuidFilter.insert(Func);
-        }
       }
     }
   }
@@ -832,14 +833,6 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
     SymbolStartAddrs[GUID] = Addr;
     StartAddrToSymMap.emplace(Addr, GUID);
   }
-
-  // Load DWARF name too if they are overwritten by the symbol table
-  for (auto [OldGUID, Func] : OverriddenBinaryFunctions) {
-    uint64_t GUID = Function::getGUIDAssumingExternalLinkage(Func->FuncName);
-    uint64_t Addr = SymbolStartAddrs[GUID];
-    SymbolStartAddrs[OldGUID] = Addr;
-    StartAddrToSymMap.emplace(Addr, OldGUID);
-  }
 }
 
 void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
@@ -926,7 +919,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
       HashBinaryFunctions.erase(OldGUID);
       BinaryFunctions.erase(ErrSym);
 
-      OverriddenBinaryFunctions[OldGUID] = &Func;
+      OverriddenBinaryFunctions[&Func] = OldGUID;
       HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
       Range->Func = &Func;
       for (auto [RangeStart, _] : Func.Ranges) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 8858300678343..4268bd74e41fc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -232,9 +232,10 @@ class ProfiledBinary {
   // GUID to symbol start address map
   DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
 
-  // GUID mapping of the overridden DWARF symbol names to the binary functions
+  // GUID mapping of the overridden DWARF symbol names by the binary functions
   // with the symbol table names
-  std::unordered_map<uint64_t, BinaryFunction *> OverriddenBinaryFunctions;
+  std::unordered_map<const BinaryFunction *, uint64_t>
+      OverriddenBinaryFunctions;
 
   // These maps are for temporary use of warning diagnosis.
   DenseSet<int64_t> AddrsWithMultipleSymbols;

>From 3d8ad533c10b1b5a39479f3e23c990f11886c3cc Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 17 Nov 2025 17:15:38 -0800
Subject: [PATCH 20/24] Further cleanup and add more GuidFilters

---
 llvm/tools/llvm-profgen/PerfReader.cpp       |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp |  2 +-
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   | 44 +++++---------------
 llvm/tools/llvm-profgen/ProfiledBinary.h     | 10 ++---
 4 files changed, 18 insertions(+), 40 deletions(-)

diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index a8a9c6eda85fd..b827ebfe261f3 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1310,7 +1310,7 @@ void PerfScriptReader::warnInvalidRange() {
       continue;
     }
 
-    if (FRange->Func->FromSymtab)
+    if (FRange->Func->HasSymtabName)
       RecoveredRange += I.second;
 
     if (EndAddress >= FRange->EndAddress) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index efc6b6a0595a0..4a5d3d4aff3cb 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -733,7 +733,7 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
   if (!FRange || !FRange->IsFuncEntry)
     return StringRef();
 
-  if (RestoreSymbolName && FRange->Func->FromSymtab) {
+  if (RestoreSymbolName && FRange->Func->HasSymtabName) {
     const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
     for (const MCDecodedPseudoProbe &Probe :
          Address2ProbesMap.find(TargetAddress)) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 113a7ab818c24..7d5c3cc2ff148 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -471,11 +471,11 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
   } else {
     for (auto *F : ProfiledFunctions) {
       GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
-      // Function name may be changed when symbol table is loaded. Adding
-      // back the original GUID if possible
-      auto OldGuid = OverriddenBinaryFunctions.find(F);
-      if (OldGuid != OverriddenBinaryFunctions.end())
-        GuidFilter.insert(OldGuid->second);
+      // Function may have different names in symbol table. Adding
+      // back all the GUIDs if possible
+      auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F);
+      for (const auto &[_, Func] : make_range(AltGUIDs))
+        GuidFilter.insert(Func);
       for (auto &Range : F->Ranges) {
         auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
         for (const auto &[StartAddr, Func] : make_range(GUIDs))
@@ -875,7 +875,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
         HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
       }
 
-      Func.FromSymtab = true;
+      Func.HasSymtabName = true;
       Func.Ranges.emplace_back(StartAddr, EndAddr);
 
       auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
@@ -887,7 +887,8 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
 
     } else if (SymName != Range->getFuncName()) {
       // Function range already found from DWARF, but the symbol name from
-      // symbol table is inconsistent with debug info.
+      // symbol table is inconsistent with debug info. Log this discrepaency and
+      // the alternative function GUID.
       if (ShowDetailedWarning)
         WithColor::warning()
             << "Conflicting name for symbol " << Name << " with range ("
@@ -901,33 +902,10 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
       assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
              "Mismatched function range");
 
-      auto ErrSym = BinaryFunctions.find(Range->getFuncName().str());
-      auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
-      auto &Func = Ret.first->second;
-
-      // Symbol table may contain multiple symbol names of the same starting
-      // address. Only need to pick one from these.
-      if (!Ret.second)
-        continue;
+      Range->Func->HasSymtabName = true;
+      AlternativeFunctionGUIDs.emplace(Range->Func,
+                                       MD5Hash(StringRef(SymName)));
 
-      uint64_t OldGUID = MD5Hash(Range->getFuncName());
-
-      Func.FuncName = Ret.first->first;
-      Func.Ranges = ErrSym->second.Ranges;
-      Func.FromSymtab = true;
-
-      HashBinaryFunctions.erase(OldGUID);
-      BinaryFunctions.erase(ErrSym);
-
-      OverriddenBinaryFunctions[&Func] = OldGUID;
-      HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
-      Range->Func = &Func;
-      for (auto [RangeStart, _] : Func.Ranges) {
-        if (auto FRange = findFuncRangeForStartAddr(RangeStart)) {
-          assert(FRange && "Cannot find function range");
-          FRange->Func = &Func;
-        }
-      }
     } else if (StartAddr != Range->StartAddress &&
                EndAddr != Range->EndAddress) {
       // Function already found in DWARF, but the address range from symbol
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 4268bd74e41fc..afd2fb482b0cd 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -76,7 +76,7 @@ struct BinaryFunction {
   StringRef FuncName;
   // End of range is an exclusive bound.
   RangesTy Ranges;
-  bool FromSymtab = false;
+  bool HasSymtabName = false;
 
   uint64_t getFuncSize() {
     uint64_t Sum = 0;
@@ -232,10 +232,10 @@ class ProfiledBinary {
   // GUID to symbol start address map
   DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
 
-  // GUID mapping of the overridden DWARF symbol names by the binary functions
-  // with the symbol table names
-  std::unordered_map<const BinaryFunction *, uint64_t>
-      OverriddenBinaryFunctions;
+  // Binary function to GUID mapping that stores the alternative names in symbol
+  // table, despite the original name from DWARF info
+  std::unordered_multimap<const BinaryFunction *, uint64_t>
+      AlternativeFunctionGUIDs;
 
   // These maps are for temporary use of warning diagnosis.
   DenseSet<int64_t> AddrsWithMultipleSymbols;

>From 1344cddecb9798f308e8df9491b168ec8fc99939 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Thu, 20 Nov 2025 23:19:10 -0800
Subject: [PATCH 21/24] Infer callee name with pseudo probe names

---
 llvm/include/llvm/MC/MCPseudoProbe.h         |  1 +
 llvm/tools/llvm-profgen/Options.h            |  1 +
 llvm/tools/llvm-profgen/PerfReader.cpp       |  2 +-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp | 23 +++---
 llvm/tools/llvm-profgen/ProfileGenerator.h   |  3 +-
 llvm/tools/llvm-profgen/ProfiledBinary.cpp   | 74 ++++++++++++++++----
 llvm/tools/llvm-profgen/ProfiledBinary.h     | 20 +++++-
 7 files changed, 94 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index ac28e45891df2..fc722378b586a 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -328,6 +328,7 @@ class MCDecodedPseudoProbeInlineTree
 
   // Return false if it's a dummy inline site
   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
+  bool isTopLevelFunc() const { return !isRoot() && Parent->isRoot(); }
   InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
   void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
     Probes = ProbesRef.data();
diff --git a/llvm/tools/llvm-profgen/Options.h b/llvm/tools/llvm-profgen/Options.h
index f94cf9118c06a..b2c941fb01945 100644
--- a/llvm/tools/llvm-profgen/Options.h
+++ b/llvm/tools/llvm-profgen/Options.h
@@ -22,6 +22,7 @@ extern cl::opt<bool> ShowDetailedWarning;
 extern cl::opt<bool> InferMissingFrames;
 extern cl::opt<bool> EnableCSPreInliner;
 extern cl::opt<bool> UseContextCostForPreInliner;
+extern cl::opt<bool> LoadFunctionFromSymbol;
 
 } // end namespace llvm
 
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index b827ebfe261f3..1dc59321fd91f 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -1310,7 +1310,7 @@ void PerfScriptReader::warnInvalidRange() {
       continue;
     }
 
-    if (FRange->Func->HasSymtabName)
+    if (FRange->Func->NameStatus != DwarfNameStatus::Matched)
       RecoveredRange += I.second;
 
     if (EndAddress >= FRange->EndAddress) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 4a5d3d4aff3cb..e39ab6e4ba979 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -503,8 +503,11 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
 void ProfileGenerator::generateProfile() {
   collectProfiledFunctions();
 
-  if (Binary->usePseudoProbes())
+  if (Binary->usePseudoProbes()) {
     Binary->decodePseudoProbe();
+    if (LoadFunctionFromSymbol)
+      Binary->loadSymbolsFromPseudoProbe();
+  }
 
   if (SampleCounters) {
     if (Binary->usePseudoProbes()) {
@@ -723,8 +726,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions(
 }
 
 StringRef
-ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
-                                              bool RestoreSymbolName) {
+ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
   // Get the function range by branch target if it's a call branch.
   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
 
@@ -733,14 +735,9 @@ ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress,
   if (!FRange || !FRange->IsFuncEntry)
     return StringRef();
 
-  if (RestoreSymbolName && FRange->Func->HasSymtabName) {
-    const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap();
-    for (const MCDecodedPseudoProbe &Probe :
-         Address2ProbesMap.find(TargetAddress)) {
-      if (const auto *ProbeDesc = Binary->getFuncDescForGUID(Probe.getGuid()))
-        return FunctionSamples::getCanonicalFnName(ProbeDesc->FuncName);
-    }
-  }
+  auto FuncName = Binary->findPseudoProbeName(FRange->Func);
+  if (FuncName.size())
+    return FunctionSamples::getCanonicalFnName(FuncName);
 
   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
 }
@@ -929,6 +926,8 @@ void CSProfileGenerator::generateProfile() {
     Binary->decodePseudoProbe();
     if (InferMissingFrames)
       initializeMissingFrameInferrer();
+    if (LoadFunctionFromSymbol)
+      Binary->loadSymbolsFromPseudoProbe();
   }
 
   if (SampleCounters) {
@@ -1362,7 +1361,7 @@ void CSProfileGenerator::populateBoundarySamplesWithProbes(
         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
     FunctionProfile.addTotalSamples(Count);
-    StringRef CalleeName = getCalleeNameForAddress(TargetAddress, true);
+    StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
     if (CalleeName.size() == 0)
       continue;
     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(),
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 9c347bb78dae1..a4b738016ec3a 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -109,8 +109,7 @@ class ProfileGeneratorBase {
 
   bool filterAmbiguousProfile(FunctionSamples &FS);
 
-  StringRef getCalleeNameForAddress(uint64_t TargetAddress,
-                                    bool RestoreSymbolName = false);
+  StringRef getCalleeNameForAddress(uint64_t TargetAddress);
 
   void computeSummaryAndThreshold(SampleProfileMap &ProfileMap);
 
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 7d5c3cc2ff148..2253706cf5736 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -37,6 +37,13 @@ cl::opt<bool> ShowSourceLocations("show-source-locations",
                                   cl::desc("Print source locations."),
                                   cl::cat(ProfGenCategory));
 
+cl::opt<bool>
+    LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
+                           cl::desc("Gather additional binary function info "
+                                    "from symbols (e.g. .symtab) in case "
+                                    "dwarf info is incomplete."),
+                           cl::cat(ProfGenCategory));
+
 static cl::opt<bool>
     ShowCanonicalFnName("show-canonical-fname",
                         cl::desc("Print canonical function name."),
@@ -65,13 +72,6 @@ static cl::list<std::string> DisassembleFunctions(
              "names only. Only work with show-disassembly-only"),
     cl::cat(ProfGenCategory));
 
-static cl::opt<bool>
-    LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true),
-                           cl::desc("Gather additional binary function info "
-                                    "from symbols (e.g. .symtab) in case "
-                                    "dwarf info is incomplete."),
-                           cl::cat(ProfGenCategory));
-
 static cl::opt<bool>
     KernelBinary("kernel",
                  cl::desc("Generate the profile for Linux kernel binary."),
@@ -471,8 +471,10 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
   } else {
     for (auto *F : ProfiledFunctions) {
       GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName));
-      // Function may have different names in symbol table. Adding
-      // back all the GUIDs if possible
+      // DWARF name might be broken when a DWARF32 .debug_str.dwo section
+      // execeeds 4GB. We expect symbol table to contain the correct function
+      // names which matches the pseudo probe. Adding back all the GUIDs if
+      // possible.
       auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F);
       for (const auto &[_, Func] : make_range(AltGUIDs))
         GuidFilter.insert(Func);
@@ -862,11 +864,13 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
     const uint64_t EndAddr = StartAddr + Size;
     const StringRef SymName =
         FunctionSamples::getCanonicalFnName(Name, Suffixes);
-    assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress());
+    assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress() &&
+           "Function range is invalid.");
 
     auto Range = findFuncRange(StartAddr);
     if (!Range) {
-      assert(findFuncRange(EndAddr - 1) == nullptr);
+      assert(findFuncRange(EndAddr - 1) == nullptr &&
+             "Function range overlaps with existing functions.");
       // Function from symbol table not found previously in DWARF, store ranges.
       auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
       auto &Func = Ret.first->second;
@@ -875,7 +879,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
         HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
       }
 
-      Func.HasSymtabName = true;
+      Func.NameStatus = DwarfNameStatus::Missing;
       Func.Ranges.emplace_back(StartAddr, EndAddr);
 
       auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange());
@@ -887,7 +891,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
 
     } else if (SymName != Range->getFuncName()) {
       // Function range already found from DWARF, but the symbol name from
-      // symbol table is inconsistent with debug info. Log this discrepaency and
+      // symbol table is inconsistent with debug info. Log this discrepancy and
       // the alternative function GUID.
       if (ShowDetailedWarning)
         WithColor::warning()
@@ -902,7 +906,7 @@ void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) {
       assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
              "Mismatched function range");
 
-      Range->Func->HasSymtabName = true;
+      Range->Func->NameStatus = DwarfNameStatus::Mismatch;
       AlternativeFunctionGUIDs.emplace(Range->Func,
                                        MD5Hash(StringRef(SymName)));
 
@@ -1136,6 +1140,48 @@ void ProfiledBinary::computeInlinedContextSizeForFunc(
   }
 }
 
+void ProfiledBinary::loadSymbolsFromPseudoProbe() {
+  if (!UsePseudoProbes)
+    return;
+
+  const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap();
+  for (auto &[Addr, Range] : StartAddrToFuncRangeMap) {
+    auto Func = Range.Func;
+    if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
+      continue;
+#ifndef NDEBUG
+    if (PseudoProbeNames.count(Func))
+      continue;
+#endif
+    const auto &Probe = Address2ProbesMap.find(Addr).begin();
+    if (Probe != Address2ProbesMap.end()) {
+      const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+          Probe->get().getInlineTreeNode();
+      while (!InlineTreeNode->isTopLevelFunc())
+        InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
+            InlineTreeNode->Parent);
+
+      auto TopLevelProbes = InlineTreeNode->getProbes();
+      auto TopProbe = TopLevelProbes.begin();
+      assert(TopProbe != TopLevelProbes.end() &&
+             TopProbe->getAddress() >= Addr &&
+             "Top level pseudo probe does not match function range");
+
+      const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
+      auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
+      assert((Ret.second || Ret.first->second == ProbeDesc->FuncName) &&
+             "Mismatched pseudo probe names");
+    }
+  }
+}
+
+StringRef ProfiledBinary::findPseudoProbeName(const BinaryFunction *Func) {
+  auto ProbeName = PseudoProbeNames.find(Func);
+  if (ProbeName == PseudoProbeNames.end())
+    return StringRef();
+  return ProbeName->second;
+}
+
 void ProfiledBinary::inferMissingFrames(
     const SmallVectorImpl<uint64_t> &Context,
     SmallVectorImpl<uint64_t> &NewContext) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index afd2fb482b0cd..1a83f8221df11 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -72,11 +72,22 @@ enum SpecialFrameAddr {
 
 using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
 
+enum DwarfNameStatus {
+  // Dwarf name matches with the symbol table (or symbol table just doesn't have
+  // this entry)
+  Matched = 0,
+  // Dwarf name is missing, but we fixed it with the name from symbol table
+  Missing = 1,
+  // Symbol table has different names on this. Log these GUIDs in
+  // AlternativeFunctionGUIDs
+  Mismatch = 2,
+};
+
 struct BinaryFunction {
   StringRef FuncName;
   // End of range is an exclusive bound.
   RangesTy Ranges;
-  bool HasSymtabName = false;
+  DwarfNameStatus NameStatus = DwarfNameStatus::Matched;
 
   uint64_t getFuncSize() {
     uint64_t Sum = 0;
@@ -237,6 +248,9 @@ class ProfiledBinary {
   std::unordered_multimap<const BinaryFunction *, uint64_t>
       AlternativeFunctionGUIDs;
 
+  // Mapping of profiled binary function to its pseudo probe name
+  std::unordered_map<const BinaryFunction *, StringRef> PseudoProbeNames;
+
   // These maps are for temporary use of warning diagnosis.
   DenseSet<int64_t> AddrsWithMultipleSymbols;
   DenseSet<std::pair<uint64_t, uint64_t>> AddrsWithInvalidInstruction;
@@ -608,6 +622,10 @@ class ProfiledBinary {
 
   void computeInlinedContextSizeForFunc(const BinaryFunction *Func);
 
+  void loadSymbolsFromPseudoProbe();
+
+  StringRef findPseudoProbeName(const BinaryFunction *Func);
+
   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
     return ProbeDecoder.getCallProbeForAddr(Address);
   }

>From 2978e21cbab14d0f5f1d62e1e481c8b4ce41597f Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 21 Nov 2025 21:30:34 -0800
Subject: [PATCH 22/24] promote eligible entry point functions

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2253706cf5736..e2609ccd7df0b 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -539,7 +539,9 @@ void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
   // Set IsFuncEntry to ture if there is only one range in the function or the
   // RangeSymName from ELF is equal to its DWARF-based function name.
   if (FuncRange->Func->Ranges.size() == 1 ||
-      (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
+      (!FuncRange->IsFuncEntry &&
+       (FuncRange->getFuncName() == RangeSymName ||
+        FuncRange->Func->NameStatus != DwarfNameStatus::Matched)))
     FuncRange->IsFuncEntry = true;
 }
 

>From eaab9dfd3c9baa3fe045a1c58934f03e7b15ed2a Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Fri, 21 Nov 2025 22:14:39 -0800
Subject: [PATCH 23/24] Update pseudo probe search range && range checks

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e2609ccd7df0b..cc601b30bf342 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -1151,14 +1151,10 @@ void ProfiledBinary::loadSymbolsFromPseudoProbe() {
     auto Func = Range.Func;
     if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
       continue;
-#ifndef NDEBUG
-    if (PseudoProbeNames.count(Func))
-      continue;
-#endif
-    const auto &Probe = Address2ProbesMap.find(Addr).begin();
-    if (Probe != Address2ProbesMap.end()) {
+    const auto &Probe = Address2ProbesMap.find(Addr, Range.EndAddress);
+    if (Probe.begin() != Probe.end()) {
       const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
-          Probe->get().getInlineTreeNode();
+          Probe.begin()->get().getInlineTreeNode();
       while (!InlineTreeNode->isTopLevelFunc())
         InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
             InlineTreeNode->Parent);

>From c53e0ae47bda56620eb4876f1a82bd745ec406f9 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Sun, 23 Nov 2025 20:35:53 -0800
Subject: [PATCH 24/24] Iterate through ProfiledFunctions to reduce scope

---
 llvm/tools/llvm-profgen/ProfiledBinary.cpp | 56 ++++++++++++++--------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index cc601b30bf342..6303eb1615854 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -1147,28 +1147,42 @@ void ProfiledBinary::loadSymbolsFromPseudoProbe() {
     return;
 
   const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap();
-  for (auto &[Addr, Range] : StartAddrToFuncRangeMap) {
-    auto Func = Range.Func;
-    if (!Range.IsFuncEntry || Func->NameStatus != DwarfNameStatus::Mismatch)
+  for (auto *Func : ProfiledFunctions) {
+    if (Func->NameStatus != DwarfNameStatus::Mismatch)
       continue;
-    const auto &Probe = Address2ProbesMap.find(Addr, Range.EndAddress);
-    if (Probe.begin() != Probe.end()) {
-      const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
-          Probe.begin()->get().getInlineTreeNode();
-      while (!InlineTreeNode->isTopLevelFunc())
-        InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
-            InlineTreeNode->Parent);
-
-      auto TopLevelProbes = InlineTreeNode->getProbes();
-      auto TopProbe = TopLevelProbes.begin();
-      assert(TopProbe != TopLevelProbes.end() &&
-             TopProbe->getAddress() >= Addr &&
-             "Top level pseudo probe does not match function range");
-
-      const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
-      auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
-      assert((Ret.second || Ret.first->second == ProbeDesc->FuncName) &&
-             "Mismatched pseudo probe names");
+    for (auto &[StartAddr, EndAddr] : Func->Ranges) {
+      auto Range = findFuncRangeForStartAddr(StartAddr);
+      if (!Range->IsFuncEntry)
+        continue;
+      const auto &Probe = Address2ProbesMap.find(StartAddr, EndAddr);
+      if (Probe.begin() != Probe.end()) {
+        const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
+            Probe.begin()->get().getInlineTreeNode();
+        while (!InlineTreeNode->isTopLevelFunc())
+          InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>(
+              InlineTreeNode->Parent);
+
+        auto TopLevelProbes = InlineTreeNode->getProbes();
+        auto TopProbe = TopLevelProbes.begin();
+        assert(TopProbe != TopLevelProbes.end() &&
+               TopProbe->getAddress() >= StartAddr &&
+               TopProbe->getAddress() < EndAddr &&
+               "Top level pseudo probe does not match function range");
+
+        const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid);
+        auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName);
+        if (!Ret.second && Ret.first->second != ProbeDesc->FuncName &&
+            ShowDetailedWarning)
+          WithColor::warning()
+              << "Mismatched pseudo probe names in function " << Func->FuncName
+              << " at range: (" << format("%8" PRIx64, StartAddr) << ", "
+              << format("%8" PRIx64, EndAddr) << "). "
+              << "The previously found pseudo probe name is "
+              << Ret.first->second << " but it conflicts with name "
+              << ProbeDesc->FuncName
+              << " This likely indicates a DWARF error that produces "
+                 "conflicting symbols at the same starting address.\n";
+      }
     }
   }
 }



More information about the llvm-commits mailing list