[Lldb-commits] [lldb] [LLDB][NativePDB] Create functions with mangled name (PR #149701)
via lldb-commits
lldb-commits at lists.llvm.org
Sun Jul 20 04:36:01 PDT 2025
https://github.com/Nerixyz created https://github.com/llvm/llvm-project/pull/149701
Before, functions created using the NativePDB plugin would not know about their mangled name. This showed when printing a stacktrace. There, only the function name was shown. For https://github.com/llvm/llvm-project/issues/143149, the mangled function name is required to separate different parts.
This PR adds that name if available. It reimplements functionality from Microsoft's PDB reference to find a symbol given an address.
The Clang AST nodes also take in a mangled name, which was previously unset. I don't think this unblocks anything further, because Clang can mangle the function anyway.
>From 5b4886056e49d0821052affab9146928707393b3 Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Sun, 20 Jul 2025 13:25:56 +0200
Subject: [PATCH] [LLDB][NativePDB] Create functions with mangled name
---
.../SymbolFile/NativePDB/PdbAstBuilder.cpp | 21 ++++--
.../Plugins/SymbolFile/NativePDB/PdbUtil.h | 10 +++
.../NativePDB/SymbolFileNativePDB.cpp | 70 ++++++++++++++++++-
.../NativePDB/SymbolFileNativePDB.h | 3 +
.../NativePDB/break-by-function.cpp | 6 +-
.../SymbolFile/NativePDB/break-by-line.cpp | 2 +-
.../SymbolFile/NativePDB/disassembly.cpp | 2 +-
.../SymbolFile/NativePDB/local-variables.cpp | 10 +--
.../NativePDB/stack_unwinding01.cpp | 12 ++--
9 files changed, 112 insertions(+), 24 deletions(-)
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index 702ec5e5c9ea9..79e7de8a75de7 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -38,16 +38,18 @@ struct CreateMethodDecl : public TypeVisitorCallbacks {
TypeIndex func_type_index,
clang::FunctionDecl *&function_decl,
lldb::opaque_compiler_type_t parent_ty,
- llvm::StringRef proc_name, CompilerType func_ct)
+ llvm::StringRef proc_name, ConstString mangled_name,
+ CompilerType func_ct)
: m_index(m_index), m_clang(m_clang), func_type_index(func_type_index),
function_decl(function_decl), parent_ty(parent_ty),
- proc_name(proc_name), func_ct(func_ct) {}
+ proc_name(proc_name), mangled_name(mangled_name), func_ct(func_ct) {}
PdbIndex &m_index;
TypeSystemClang &m_clang;
TypeIndex func_type_index;
clang::FunctionDecl *&function_decl;
lldb::opaque_compiler_type_t parent_ty;
llvm::StringRef proc_name;
+ ConstString mangled_name;
CompilerType func_ct;
llvm::Error visitKnownMember(CVMemberRecord &cvr,
@@ -87,8 +89,8 @@ struct CreateMethodDecl : public TypeVisitorCallbacks {
bool is_artificial = (options & MethodOptions::CompilerGenerated) ==
MethodOptions::CompilerGenerated;
function_decl = m_clang.AddMethodToCXXRecordType(
- parent_ty, proc_name,
- /*mangled_name=*/nullptr, func_ct, /*access=*/access_type,
+ parent_ty, proc_name, mangled_name.GetCString(), func_ct,
+ /*access=*/access_type,
/*is_virtual=*/is_virtual, /*is_static=*/is_static,
/*is_inline=*/false, /*is_explicit=*/false,
/*is_attr_used=*/false, /*is_artificial=*/is_artificial);
@@ -888,6 +890,11 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id,
tag_record = CVTagRecord::create(index.tpi().getType(*eti)).asTag();
}
}
+
+ ConstString mangled_name;
+ if (auto mangled_name_opt = pdb->FindMangledFunctionName(func_id))
+ mangled_name = ConstString(*mangled_name_opt);
+
if (!tag_record.FieldList.isSimple()) {
CVType field_list_cvt = index.tpi().getType(tag_record.FieldList);
FieldListRecord field_list;
@@ -895,15 +902,15 @@ PdbAstBuilder::CreateFunctionDecl(PdbCompilandSymId func_id,
field_list_cvt, field_list))
llvm::consumeError(std::move(error));
CreateMethodDecl process(index, m_clang, func_ti, function_decl,
- parent_opaque_ty, func_name, func_ct);
+ parent_opaque_ty, func_name, mangled_name,
+ func_ct);
if (llvm::Error err = visitMemberRecordStream(field_list.Data, process))
llvm::consumeError(std::move(err));
}
if (!function_decl) {
function_decl = m_clang.AddMethodToCXXRecordType(
- parent_opaque_ty, func_name,
- /*mangled_name=*/nullptr, func_ct,
+ parent_opaque_ty, func_name, mangled_name.GetCString(), func_ct,
/*access=*/lldb::AccessType::eAccessPublic,
/*is_virtual=*/false, /*is_static=*/false,
/*is_inline=*/false, /*is_explicit=*/false,
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
index 1f888f4de1fed..d910b6e8ad28e 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
@@ -89,6 +89,16 @@ struct SegmentOffset {
SegmentOffset(uint16_t s, uint32_t o) : segment(s), offset(o) {}
uint16_t segment = 0;
uint32_t offset = 0;
+
+ bool operator==(SegmentOffset rhs) const {
+ return segment == rhs.segment && offset == rhs.offset;
+ }
+
+ bool operator<(SegmentOffset rhs) const {
+ if (segment == rhs.segment)
+ return offset < rhs.offset;
+ return segment < rhs.segment;
+ }
};
struct SegmentOffsetLength {
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 20d8c1acf9c42..ca4718b7e0261 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -38,6 +38,7 @@
#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/PDB.h"
@@ -496,7 +497,9 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id,
return nullptr;
PdbTypeSymId sig_id(proc.FunctionType, false);
- Mangled mangled(proc.Name);
+ auto mangled_opt =
+ FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset));
+ Mangled mangled(mangled_opt.value_or(proc.Name));
FunctionSP func_sp = std::make_shared<Function>(
&comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled,
func_type.get(), func_addr,
@@ -2353,3 +2356,68 @@ SymbolFileNativePDB::GetParentType(llvm::codeview::TypeIndex ti) {
return std::nullopt;
return parent_iter->second;
}
+
+std::optional<llvm::StringRef>
+SymbolFileNativePDB::FindMangledFunctionName(PdbCompilandSymId func_id) {
+ const CompilandIndexItem *cci =
+ m_index->compilands().GetCompiland(func_id.modi);
+ if (!cci)
+ return std::nullopt;
+
+ CVSymbol sym_record = cci->m_debug_stream.readSymbolAtOffset(func_id.offset);
+ if (sym_record.kind() != S_LPROC32 && sym_record.kind() != S_GPROC32)
+ return std::nullopt;
+
+ ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind()));
+ cantFail(SymbolDeserializer::deserializeAs<ProcSym>(sym_record, proc));
+ return FindMangledSymbol(SegmentOffset(proc.Segment, proc.CodeOffset));
+}
+
+/// Find the mangled name of a function at \a so.
+///
+/// This is similar to the NearestSym function from Microsoft's PDB reference:
+/// https://github.com/microsoft/microsoft-pdb/blob/805655a28bd8198004be2ac27e6e0290121a5e89/PDB/dbi/gsi.cpp#L1492-L1581
+/// The main difference is that we search for the exact symbol.
+///
+/// \param so[in] The address of the function given by its segment and code
+/// offset.
+/// \return The mangled function name if found. Otherwise an empty optional.
+std::optional<llvm::StringRef>
+SymbolFileNativePDB::FindMangledSymbol(SegmentOffset so) {
+ // The address map is sorted by address, so we do binary search.
+ // Each element is an offset into the symbols for a public symbol.
+ auto lo = m_index->publics().getAddressMap().begin();
+ auto hi = m_index->publics().getAddressMap().end();
+ hi -= 1;
+
+ while (lo < hi) {
+ auto tgt = lo + ((hi - lo + 1) / 2);
+ auto val = tgt->value();
+ auto sym = m_index->symrecords().readRecord(val);
+ if (sym.kind() != S_PUB32)
+ return std::nullopt; // this is most likely corrupted debug info
+
+ PublicSym32 psym =
+ llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
+ SegmentOffset cur(psym.Segment, psym.Offset);
+ if (so < cur) {
+ tgt -= 1;
+ hi = tgt;
+ } else if (so == cur)
+ return psym.Name;
+ else
+ lo = tgt;
+ }
+
+ // We might've found something, check if it's the symbol we're searching for
+ auto val = lo->value();
+ auto sym = m_index->symrecords().readRecord(val);
+ if (sym.kind() != S_PUB32)
+ return std::nullopt;
+ PublicSym32 psym =
+ llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
+ if (psym.Segment != so.segment || psym.Offset != so.offset)
+ return std::nullopt;
+ return psym.Name;
+}
+
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
index 9891313f11d0b..abb70600447c1 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
@@ -140,6 +140,9 @@ class SymbolFileNativePDB : public SymbolFileCommon {
std::optional<PdbCompilandSymId> FindSymbolScope(PdbCompilandSymId id);
+ std::optional<llvm::StringRef> FindMangledFunctionName(PdbCompilandSymId id);
+ std::optional<llvm::StringRef> FindMangledSymbol(SegmentOffset so);
+
void FindTypes(const lldb_private::TypeQuery &match,
lldb_private::TypeResults &results) override;
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
index a580d574a9ca3..d4499373bb860 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-function.cpp
@@ -50,9 +50,9 @@ int main(int argc, char **argv) {
// CHECK: 1: name = 'main', locations = 1
// CHECK: 1.1: where = break-by-function.cpp.tmp.exe`main + {{[0-9]+}}
// CHECK: 2: name = 'OvlGlobalFn', locations = 3
-// CHECK: 2.1: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}}
-// CHECK: 2.2: where = break-by-function.cpp.tmp.exe`OvlGlobalFn
-// CHECK: 2.3: where = break-by-function.cpp.tmp.exe`OvlGlobalFn + {{[0-9]+}}
+// CHECK: 2.1: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int) + {{[0-9]+}}
+// CHECK: 2.2: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int)
+// CHECK: 2.3: where = break-by-function.cpp.tmp.exe`int OvlGlobalFn(int, int, int) + {{[0-9]+}}
// CHECK: 3: name = 'StaticFn', locations = 1
// CHECK: 3.1: where = break-by-function.cpp.tmp.exe`StaticFn + {{[0-9]+}}
// CHECK: 4: name = 'DoesntExist', locations = 0 (pending)
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
index 90ac633b01632..3d7de3275ed65 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/break-by-line.cpp
@@ -24,4 +24,4 @@ int main(int argc, char **argv) {
// CHECK: (lldb) target create "{{.*}}break-by-line.cpp.tmp.exe"
// CHECK: Current executable set to '{{.*}}break-by-line.cpp.tmp.exe'
// CHECK: (lldb) break set -f break-by-line.cpp -l 15
-// CHECK: Breakpoint 1: where = break-by-line.cpp.tmp.exe`NS::NamespaceFn + {{[0-9]+}} at break-by-line.cpp:15
+// CHECK: Breakpoint 1: where = break-by-line.cpp.tmp.exe`int NS::NamespaceFn(int) + {{[0-9]+}} at break-by-line.cpp:15
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
index db3b85fa7e59f..3603db80ba8a7 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
@@ -27,7 +27,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+12>: mov qword ptr [rsp + 0x28], rdx
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov dword ptr [rsp + 0x24], ecx
// CHECK: ** 15 foo();
-// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; foo at disassembly.cpp:12
+// CHECK: disassembly.cpp.tmp.exe[{{.*}}] <+21>: call {{.*}} ; int foo(void) at disassembly.cpp:12
// CHECK: ** 16 return 0;
// CHECK-NEXT: 17 }
// CHECK-NEXT: 18
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
index 44a8dc14c6158..f44a5b9dd56e2 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/local-variables.cpp
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: (lldb) step
// CHECK-NEXT: Process {{.*}} stopped
// CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
// CHECK-NEXT: 6
// CHECK-NEXT: 7
// CHECK-NEXT: 8 int Function(int Param1, char Param2) {
@@ -71,7 +71,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: (lldb) step
// CHECK-NEXT: Process {{.*}} stopped
// CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
// CHECK-NEXT: 7
// CHECK-NEXT: 8 int Function(int Param1, char Param2) {
// CHECK-NEXT: 9 unsigned Local1 = Param1 + 1;
@@ -89,7 +89,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: (lldb) step
// CHECK-NEXT: Process {{.*}} stopped
// CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
// CHECK-NEXT: 8 int Function(int Param1, char Param2) {
// CHECK-NEXT: 9 unsigned Local1 = Param1 + 1;
// CHECK-NEXT: 10 char Local2 = Param2 + 1;
@@ -109,7 +109,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: (lldb) step
// CHECK-NEXT: Process {{.*}} stopped
// CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
// CHECK-NEXT: 9 unsigned Local1 = Param1 + 1;
// CHECK-NEXT: 10 char Local2 = Param2 + 1;
// CHECK-NEXT: 11 ++Local1;
@@ -129,7 +129,7 @@ int main(int argc, char **argv) {
// CHECK-NEXT: (lldb) step
// CHECK-NEXT: Process {{.*}} stopped
// CHECK-NEXT: * thread #1, stop reason = step in
-// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
+// CHECK-NEXT: frame #0: {{.*}} local-variables.cpp.tmp.exe`int Function(Param1=16, Param2='a') at local-variables.cpp:{{.*}}
// CHECK-NEXT: 10 char Local2 = Param2 + 1;
// CHECK-NEXT: 11 ++Local1;
// CHECK-NEXT: 12 ++Local2;
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
index 596a826f4a11b..87eeebe7aa1b6 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/stack_unwinding01.cpp
@@ -24,19 +24,19 @@ int main(int argc, char **argv) {
// CHECK: (lldb) thread backtrace
// CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20
// CHECK: (lldb) thread backtrace
// CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
// CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20
// CHECK: (lldb) thread backtrace
// CHECK-NEXT: * thread #1, stop reason = breakpoint 1.1
-// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
-// CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: * frame #0: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=4, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: frame #1: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=3, b=2) at stack_unwinding01.cpp:12
+// CHECK-NEXT: frame #2: {{.*}} stack_unwinding01.cpp.tmp.exe`void Struct::simple_method(this={{.*}}, a=2, b=2) at stack_unwinding01.cpp:12
// CHECK-NEXT: frame #3: {{.*}} stack_unwinding01.cpp.tmp.exe`main(argc={{.*}}, argv={{.*}}) at stack_unwinding01.cpp:20
More information about the lldb-commits
mailing list