[Lldb-commits] [lldb] [LLDB][NativePDB] Create functions with mangled name (PR #149701)

via lldb-commits lldb-commits at lists.llvm.org
Sun Jul 20 04:36:01 PDT 2025


https://github.com/Nerixyz created https://github.com/llvm/llvm-project/pull/149701

Before, functions created using the NativePDB plugin would not know about their mangled name. This showed when printing a stacktrace. There, only the function name was shown. For https://github.com/llvm/llvm-project/issues/143149, the mangled function name is required to separate different parts.

This PR adds that name if available. It reimplements functionality from Microsoft's PDB reference to find a symbol given an address.

The Clang AST nodes also take in a mangled name, which was previously unset. I don't think this unblocks anything further, because Clang can mangle the function anyway.

>From 5b4886056e49d0821052affab9146928707393b3 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Sun, 20 Jul 2025 13:25:56 +0200
Subject: [PATCH] [LLDB][NativePDB] Create functions with mangled name

---
 .../SymbolFile/NativePDB/PdbAstBuilder.cpp    | 21 ++++--
 .../Plugins/SymbolFile/NativePDB/PdbUtil.h    | 10 +++
 .../NativePDB/SymbolFileNativePDB.cpp         | 70 ++++++++++++++++++-
 .../NativePDB/SymbolFileNativePDB.h           |  3 +
 .../NativePDB/break-by-function.cpp           |  6 +-
 .../SymbolFile/NativePDB/break-by-line.cpp    |  2 +-
 .../SymbolFile/NativePDB/disassembly.cpp      |  2 +-
 .../SymbolFile/NativePDB/local-variables.cpp  | 10 +--
 .../NativePDB/stack_unwinding01.cpp           | 12 ++--
 9 files changed, 112 insertions(+), 24 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index 702ec5e5c9ea9..79e7de8a75de7 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -38,16 +38,18 @@ struct CreateMethodDecl : public TypeVisitorCallbacks {
                    TypeIndex func_type_index,
                    clang::FunctionDecl *&function_decl,
                    lldb::opaque_compiler_type_t parent_ty,
-                   llvm::StringRef proc_name, CompilerType func_ct)
+                   llvm::StringRef proc_name, ConstString mangled_name,
+                   CompilerType func_ct)
       : m_index(m_index), m_clang(m_clang), func_type_index(func_type_index),
         function_decl(function_decl), parent_ty(parent_ty),
-        proc_name(proc_name), func_ct(func_ct) {}
+        proc_name(proc_name), mangled_name(mangled_name), func_ct(func_ct) {}
   PdbIndex &m_index;
   TypeSystemClang &m_clang;
   TypeIndex func_type_index;
   clang::FunctionDecl *&function_decl;
   lldb::opaque_compiler_type_t parent_ty;
   llvm::StringRef proc_name;
+  ConstString mangled_name;
   CompilerType func_ct;
 
   llvm::Error visitKnownMember(CVMemberRecord &cvr,
@@ -87,8 +89,8 @@ struct CreateMethodDecl : public TypeVisitorCallbacks {
     bool is_artificial = (options & MethodOptions::CompilerGenerated) ==
                          MethodOptions::CompilerGenerated;
     function_decl = m_clang.AddMethodToCXXRecordType(
-        parent_ty, proc_name,
-        /*mangled_name=*/nullptr, func_ct, /*access=*/access_type,
+        parent_ty, proc_name, mangled_name.GetCString(), func_ct,
+        /*access=*/access_type,
         /*is_virtual=*/is_virtual, /*is_static=*/is_static,
         /*is_inline=*/false, /*is_explicit=*/false,
         /*is_attr_used=*/false, /*is_artificial=*/is_artificial);
@@ -888,6 +890,11 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id,
         tag_record = CVTagRecord::create(index.tpi().getType(*eti)).asTag();
       }
     }
+
+    ConstString mangled_name;
+    if (auto mangled_name_opt = pdb->FindMangledFunctionName(func_id))
+      mangled_name = ConstString(*mangled_name_opt);
+
     if (!tag_record.FieldList.isSimple()) {
       CVType field_list_cvt = index.tpi().getType(tag_record.FieldList);
       FieldListRecord field_list;
@@ -895,15 +902,15 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id,
               field_list_cvt, field_list))
         llvm::consumeError(std::move(error));
       CreateMethodDecl process(index, m_clang, func_ti, function_decl,
-                               parent_opaque_ty, func_name, func_ct);
+                               parent_opaque_ty, func_name, mangled_name,
+                               func_ct);
       if (llvm::Error err = visitMemberRecordStream(field_list.Data, process))
         llvm::consumeError(std::move(err));
     }
 
     if (!function_decl) {
       function_decl = m_clang.AddMethodToCXXRecordType(
-          parent_opaque_ty, func_name,
-          /*mangled_name=*/nullptr, func_ct,
+          parent_opaque_ty, func_name, mangled_name.GetCString(), func_ct,
           /*access=*/lldb::AccessType::eAccessPublic,
           /*is_virtual=*/false, /*is_static=*/false,
           /*is_inline=*/false, /*is_explicit=*/false,
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
index 1f888f4de1fed..d910b6e8ad28e 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
@@ -89,6 +89,16 @@ struct SegmentOffset {
   SegmentOffset(uint16_t s, uint32_t o) : segment(s), offset(o) {}
   uint16_t segment = 0;
   uint32_t offset = 0;
+
+  bool operator==(SegmentOffset rhs) const {
+    return segment == rhs.segment && offset == rhs.offset;
+  }
+
+  bool operator<(SegmentOffset rhs) const {
+    if (segment == rhs.segment)
+      return offset < rhs.offset;
+    return segment < rhs.segment;
+  }
 };
 
 struct SegmentOffsetLength {
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 20d8c1acf9c42..ca4718b7e0261 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -38,6 +38,7 @@
 #include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
 #include "llvm/DebugInfo/PDB/PDB.h"
@@ -496,7 +497,9 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id,
     return nullptr;
 
   PdbTypeSymId sig_id(proc.FunctionType, false);
-  Mangled mangled(proc.Name);
+  auto mangled_opt =
+      FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset));
+  Mangled mangled(mangled_opt.value_or(proc.Name));
   FunctionSP func_sp = std::make_shared<Function>(
       &comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled,
       func_type.get(), func_addr,
@@ -2353,3 +2356,68 @@ SymbolFileNativePDB::GetParentType(llvm::codeview::TypeIndex ti) {
     return std::nullopt;
   return parent_iter->second;
 }
+
+std::optional<llvm::StringRef>
+SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) {
+  const CompilandIndexItem *cci =
+      m_index->compilands().GetCompiland(func_id.modi);
+  if (!cci)
+    return std::nullopt;
+
+  CVSymbol sym_record = cci->m_debug_stream.readSymbolAtOffset(func_id.offset);
+  if (sym_record.kind() != S_LPROC32 && sym_record.kind() != S_GPROC32)
+    return std::nullopt;
+
+  ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind()));
+  cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc));
+  return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset));
+}
+
+/// Find the mangled name of a function at \a so.
+///
+/// This is similar to the NearestSym function from Microsoft's PDB reference:
+/// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
+/// The main difference is that we search for the exact symbol.
+///
+/// \param so[in] The address of the function given by its segment and code
+/// offset.
+/// \return The mangled function name if found. Otherwise an empty optional.
+std::optional<llvm::StringRef>
+SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) {
+  // The address map is sorted by address, so we do binary search.
+  // Each element is an offset into the symbols for a public symbol.
+  auto lo = m_index->publics().getAddressMap().begin();
+  auto hi = m_index->publics().getAddressMap().end();
+  hi -= 1;
+
+  while (lo < hi) {
+    auto tgt = lo + ((hi - lo + 1) / 2);
+    auto val = tgt->value();
+    auto sym = m_index->symrecords().readRecord(val);
+    if (sym.kind() != S_PUB32)
+      return std::nullopt; // this is most likely corrupted debug info
+
+    PublicSym32 psym =
+        llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
+    SegmentOffset cur(psym.Segment, psym.Offset);
+    if (so < cur) {
+      tgt -= 1;
+      hi = tgt;
+    } else if (so == cur)
+      return psym.Name;
+    else
+      lo = tgt;
+  }
+
+  // We might've found something, check if it's the symbol we're searching for
+  auto val = lo->value();
+  auto sym = m_index->symrecords().readRecord(val);
+  if (sym.kind() != S_PUB32)
+    return std::nullopt;
+  PublicSym32 psym =
+      llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
+  if (psym.Segment != so.segment || psym.Offset != so.offset)
+    return std::nullopt;
+  return psym.Name;
+}
+
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
index 9891313f11d0b..abb70600447c1 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
@@ -140,6 +140,9 @@ class SymbolFileNativePDB : public SymbolFileCommon {
 
   std::optional<PdbCompilandSymId> FindSymbolScope(PdbCompilandSymId id);
 
+  std::optional<llvm::StringRef> FindMangledFunctionName(PdbCompilandSymId id);
+  std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so);
+
   void FindTypes(const lldb_private::TypeQuery &match,
                  lldb_private::TypeResults &results) override;
 
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
index a580d574a9ca3..d4499373bb860 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
@@ -50,9 +50,9 @@ int main(int argc, char **argv) {
 // CHECK:      1: name = 'main', locations = 1
 // CHECK:        1.1: where = break-by-function.cpp.tmp.exe`main + {{[0-9]+}}
 // CHECK:      2: name = 'OvlGlobalFn', locations = 3
-// CHECK:        2.1: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}}
-// CHECK:        2.2: where = break-by-function.cpp.tmp.exe`OvlGlobalFn
-// CHECK:        2.3: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}}
+// CHECK:        2.1: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int) + {{[0-9]+}}
+// CHECK:        2.2: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int)
+// CHECK:        2.3: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int, int) + {{[0-9]+}}
 // CHECK:      3: name = 'StaticFn', locations = 1
 // CHECK:        3.1: where = break-by-function.cpp.tmp.exe`StaticFn + {{[0-9]+}}
 // CHECK:      4: name = 'DoesntExist', locations = 0 (pending)
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
index 90ac633b01632..3d7de3275ed65 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
@@ -24,4 +24,4 @@ int main(int argc, char **argv) {
 // CHECK:      (lldb) target create "{{.*}}break-by-line.cpp.tmp.exe"
 // CHECK:      Current executable set to '{{.*}}break-by-line.cpp.tmp.exe'
 // CHECK:      (lldb) break set -f break-by-line.cpp -l 15
-// CHECK:      Breakpoint 1: where = break-by-line.cpp.tmp.exe`NS::NamespaceFn + {{[0-9]+}} at break-by-line.cpp:15
+// CHECK:      Breakpoint 1: where = break-by-line.cpp.tmp.exe`int NS::NamespaceFn(int) + {{[0-9]+}} at break-by-line.cpp:15
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
index db3b85fa7e59f..3603db80ba8a7 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
@@ -27,7 +27,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+12>: mov    qword ptr [rsp + 0x28], rdx
 // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov    dword ptr [rsp + 0x24], ecx
 // CHECK:      ** 15     foo();
-// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+21>: call   {{.*}}               ; foo at disassembly.cpp:12
+// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+21>: call   {{.*}}               ; int foo(void) at disassembly.cpp:12
 // CHECK:      ** 16     return 0;
 // CHECK-NEXT:    17   }
 // CHECK-NEXT:    18
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
index 44a8dc14c6158..f44a5b9dd56e2 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: (lldb) step
 // CHECK-NEXT: Process {{.*}} stopped
 // CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
 // CHECK-NEXT:    6
 // CHECK-NEXT:    7
 // CHECK-NEXT:    8 int Function(int Param1, char Param2) {
@@ -71,7 +71,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: (lldb) step
 // CHECK-NEXT: Process {{.*}} stopped
 // CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
 // CHECK-NEXT:    7
 // CHECK-NEXT:    8    int Function(int Param1, char Param2) {
 // CHECK-NEXT:    9      unsigned Local1 = Param1 + 1;
@@ -89,7 +89,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: (lldb) step
 // CHECK-NEXT: Process {{.*}} stopped
 // CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
 // CHECK-NEXT:    8    int Function(int Param1, char Param2) {
 // CHECK-NEXT:    9      unsigned Local1 = Param1 + 1;
 // CHECK-NEXT:    10     char Local2 = Param2 + 1;
@@ -109,7 +109,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: (lldb) step
 // CHECK-NEXT: Process {{.*}} stopped
 // CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
 // CHECK-NEXT:    9      unsigned Local1 = Param1 + 1;
 // CHECK-NEXT:    10     char Local2 = Param2 + 1;
 // CHECK-NEXT:    11     ++Local1;
@@ -129,7 +129,7 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: (lldb) step
 // CHECK-NEXT: Process {{.*}} stopped
 // CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT:     frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
 // CHECK-NEXT:    10      char Local2 = Param2 + 1;
 // CHECK-NEXT:    11     ++Local1;
 // CHECK-NEXT:    12     ++Local2;
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
index 596a826f4a11b..87eeebe7aa1b6 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
@@ -24,19 +24,19 @@ int main(int argc, char **argv) {
 
 // CHECK: (lldb) thread backtrace
 // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
 // CHECK-NEXT:     frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20
 
 
 // CHECK: (lldb) thread backtrace
 // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT:     frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:     frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
 // CHECK-NEXT:     frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20
 
 // CHECK: (lldb) thread backtrace
 // CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT:     frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT:     frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:   * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:     frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT:     frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
 // CHECK-NEXT:     frame #3: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20



More information about the lldb-commits mailing list