[llvm] [llvm-gsymutil] Replace truncated DWARF names with mangled names from symbol table (PR #184221)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 19:56:52 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-debuginfo
Author: None (thechenli)
<details>
<summary>Changes</summary>
## Summary
- During `GsymCreator::finalize()`, when deduplicating entries with the same address range, check if the DWARF
entry's name is a truncated version of the symbol table's mangled name
- If the DWARF name is a substring of the demangled symbol table name, replace it with the full mangled name
before discarding the symbol table entry
- This allows downstream tools to properly demangle and display full function signatures
## Test plan
### Unit tests
- `TestMangledNameReplacement`: Verifies DWARF name `make_ftype` is replaced with `_Z10make_ftypePci` and line
table is preserved
- `TestMangledNameReplacementNegative`: Verifies no replacement when both names are mangled, or when names are
unrelated
- All 51 GSYM unit tests pass
### Lit test
- `elf-mangled-name-replacement.yaml`: End-to-end test creating an ELF with DWARF + symbol table, converting to
GSYM, and verifying the output
- All 9/9 applicable GSYM lit tests pass (6 unsupported are ARM/macOS tests on x86_64 Linux)
### Manual end-to-end testing
Created ELF binaries with `yaml2obj` containing both DWARF debug info and symbol table entries for the same
function, then converted to GSYM with `llvm-gsymutil --convert` and verified the output with `llvm-gsymutil`
dump.
**Test 1: Name replacement happens when DWARF name is truncated**
- DWARF has function named `make_ftype` with line table at `0x401000`
- Symbol table has `_Z10make_ftypePci` (demangles to `make_ftype(char*, int)`) at same address
- After conversion, GSYM output shows: `"_Z10make_ftypePci"` with line table preserved ✅
**Test 2: No replacement when names are unrelated**
- DWARF has function named `unrelated_func` with line table at `0x401000`
- Symbol table has `_Z10make_ftypePci` at same address
- After conversion, GSYM output shows: `"unrelated_func"` — name unchanged ✅
**Test 3: Replacement works with namespaced functions**
- DWARF has function named `make_ftype` with line table at `0x401000`
- Symbol table has `_ZN12_GLOBAL__N_110make_ftypeEPci` (demangles to `(anonymous namespace)::make_ftype(char*,
int)`) at same address
- After conversion, GSYM output shows: `"_ZN12_GLOBAL__N_110make_ftypeEPci"` with line table preserved ✅
---
Full diff: https://github.com/llvm/llvm-project/pull/184221.diff
5 Files Affected:
- (modified) llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h (+9)
- (modified) llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h (+11)
- (modified) llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (+39)
- (added) llvm/test/tools/llvm-gsymutil/X86/elf-mangled-name-replacement.yaml (+133)
- (modified) llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp (+105)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 74cdd48697024..b81f89215aea6 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -127,6 +127,15 @@ struct FunctionInfo {
return Name != 0;
}
+ /// Update the name of this FunctionInfo.
+ ///
+ /// DWARF debug info may have truncated function names. When the symbol table
+ /// has a longer mangled name, this allows replacing the truncated name with
+ /// the full mangled name.
+ ///
+ /// \param NewName The new string table offset for the function name.
+ void updateName(uint32_t NewName) { Name = NewName; }
+
/// Decode an object from a binary data stream.
///
/// \param Data The binary stream to read the data from. This object must
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 679c9cc0dd2ee..e58a01dc543bd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -283,6 +283,17 @@ class GsymCreator {
llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
uint64_t SegmentSize) const;
+ /// Check if a short name is a substring of a demangled mangled name.
+ ///
+ /// DWARF may have truncated names (e.g., just "make_ftype") while the
+ /// symbol table has the full mangled name (e.g., "_Z17make_ftypePci"
+ /// which demangles to "(anonymous namespace)::make_ftype(char*, int)").
+ ///
+ /// \param MangledName The mangled name from the symbol table.
+ /// \param ShortName The potentially truncated name from DWARF.
+ /// \returns True if ShortName is a substring of the demangled MangledName.
+ bool isSubStrOfMangledName(StringRef MangledName, StringRef ShortName) const;
+
/// Let this creator know that this is a segment of another GsymCreator.
///
/// When we have a segment, we know that function infos will be added in
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index f26e69fda2540..ee66597e42908 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -10,6 +10,7 @@
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/raw_ostream.h"
@@ -312,6 +313,21 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
// address ranges that have debug info are last in
// the sort.
if (!(Prev == Curr)) {
+ // Before discarding the symbol table entry, check if the DWARF
+ // entry has a truncated name that should be replaced with the
+ // full mangled name from the symbol table.
+ if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
+ // Prev is from symbol table, Curr is from DWARF.
+ if (isSubStrOfMangledName(getString(Prev.Name),
+ getString(Curr.Name)))
+ Curr.updateName(Prev.Name);
+ } else if (Prev.hasRichInfo() && !Curr.hasRichInfo()) {
+ // Prev is from DWARF, Curr is from symbol table.
+ if (isSubStrOfMangledName(getString(Curr.Name),
+ getString(Prev.Name)))
+ Prev.updateName(Curr.Name);
+ }
+
if (Prev.hasRichInfo() && Curr.hasRichInfo())
Out.Report(
"Duplicate address ranges with different debug info.",
@@ -625,3 +641,26 @@ GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
}
return std::move(GC);
}
+
+static bool isMangled(StringRef Name) {
+ return Name.size() >= 2 && Name[0] == '_' && Name[1] == 'Z';
+}
+
+bool GsymCreator::isSubStrOfMangledName(StringRef MangledName,
+ StringRef ShortName) const {
+ // Only applies when ShortName is NOT mangled and MangledName IS mangled.
+ if (isMangled(ShortName) || !isMangled(MangledName))
+ return false;
+
+ std::string Demangled = llvm::demangle(MangledName);
+ StringRef DemangledRef(Demangled);
+
+ // Check if ShortName appears within the demangled long name:
+ // - Exact match: "make_ftype" == "make_ftype"
+ // - With params: "make_ftype(" prefix match
+ // - With namespace: "::make_ftype" suffix or "::make_ftype(" substring
+ return DemangledRef == ShortName ||
+ DemangledRef.starts_with(ShortName.str() + "(") ||
+ DemangledRef.ends_with("::" + ShortName.str()) ||
+ DemangledRef.contains("::" + ShortName.str() + "(");
+}
diff --git a/llvm/test/tools/llvm-gsymutil/X86/elf-mangled-name-replacement.yaml b/llvm/test/tools/llvm-gsymutil/X86/elf-mangled-name-replacement.yaml
new file mode 100644
index 0000000000000..7e74b37c43132
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/X86/elf-mangled-name-replacement.yaml
@@ -0,0 +1,133 @@
+## Test that during GSYM conversion, truncated DWARF function names are
+## replaced with full mangled names from the symbol table.
+##
+## The DWARF debug info has a function named "make_ftype" (truncated), while
+## the symbol table has the full mangled name "_Z10make_ftypePci" at the same
+## address. After conversion, the GSYM should contain the full mangled name.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-gsymutil --convert %t -o %t.gsym 2>&1 | FileCheck %s --check-prefix=CONVERT
+# RUN: llvm-gsymutil %t.gsym 2>&1 | FileCheck %s --check-prefix=DUMP
+
+# CONVERT: Input file: {{.*\.yaml\.tmp}}
+# CONVERT: Output file (x86_64): {{.*\.yaml\.tmp\.gsym}}
+# CONVERT: Loaded 1 functions from DWARF.
+# CONVERT: Loaded 1 functions from symbol table.
+# CONVERT: Pruned 1 functions, ended with 1 total
+
+## Verify the function has the full mangled name, not the truncated DWARF name.
+# DUMP: "_Z10make_ftypePci"
+# DUMP: LineTable:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x0000000000401000
+ AddressAlign: 0x10
+ Content: 554889E531C05DC3554889E531C05DC3
+DWARF:
+ debug_str:
+ - ''
+ - main.cpp
+ - make_ftype
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Attribute: DW_AT_stmt_list
+ Form: DW_FORM_sec_offset
+ - Code: 0x2
+ Tag: DW_TAG_subprogram
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_addr
+ debug_info:
+ - Length: 0x27
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0x0
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0xA
+ - Value: 0x401000
+ - Value: 0x401010
+ - AbbrCode: 0x0
+ debug_line:
+ - Length: 61
+ Version: 2
+ PrologueLength: 31
+ MinInstLength: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ Files:
+ - Name: main.cpp
+ DirIdx: 0
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 4198400
+ - Opcode: DW_LNS_advance_line
+ SData: 9
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 8
+ - Opcode: DW_LNS_advance_line
+ SData: 1
+ Data: 0
+ - Opcode: DW_LNS_copy
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 8
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+ProgramHeaders:
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ VAddr: 0x0000000000400000
+ Align: 0x1000
+ FirstSec: .text
+ LastSec: .text
+Symbols:
+ - Name: _Z10make_ftypePci
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ Value: 0x0000000000401000
+ Size: 0x0000000000000010
+...
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index d56007371b2f2..9c042a5094934 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -5085,3 +5085,108 @@ TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
"index 4294967295 in its DW_AT_decl_file attribute");
EXPECT_TRUE(errors.find(error_str) == std::string::npos);
}
+
+TEST(GSYMTest, TestMangledNameReplacement) {
+ // Test that during finalize(), when deduplicating entries with the same
+ // address range, if the DWARF entry has a truncated name that is a
+ // substring of the demangled symbol table name, the DWARF entry's name
+ // is replaced with the full mangled name from the symbol table.
+ GsymCreator GC;
+ const auto ByteOrder = llvm::endianness::native;
+ constexpr uint64_t FuncAddr = 0x1000;
+ constexpr uint64_t FuncSize = 0x100;
+
+ // Insert a mangled name (symbol table) and a short name (DWARF).
+ // _Z10make_ftypePci demangles to make_ftype(char*, int)
+ const uint32_t MangledName = GC.insertString("_Z10make_ftypePci");
+ const uint32_t ShortName = GC.insertString("make_ftype");
+
+ // Add a symbol table entry (no line table, no inline info).
+ GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, MangledName));
+
+ // Add a DWARF entry (with line table) using the short name.
+ FunctionInfo DwarfFI(FuncAddr, FuncSize, ShortName);
+ DwarfFI.OptLineTable = LineTable();
+ const uint32_t FileIdx = GC.insertFile("/tmp/main.cpp");
+ DwarfFI.OptLineTable->push(LineEntry(FuncAddr, FileIdx, 10));
+ DwarfFI.OptLineTable->push(LineEntry(FuncAddr + 0x10, FileIdx, 20));
+ GC.addFunctionInfo(std::move(DwarfFI));
+
+ OutputAggregator Null(nullptr);
+ Error FinalizeErr = GC.finalize(Null);
+ ASSERT_FALSE(FinalizeErr);
+
+ // Encode to buffer and create a GsymReader to verify the result.
+ SmallString<512> Str;
+ raw_svector_ostream OutStrm(Str);
+ FileWriter FW(OutStrm, ByteOrder);
+ ASSERT_FALSE(bool(GC.encode(FW)));
+
+ auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str());
+ ASSERT_THAT_EXPECTED(ExpectedGR, Succeeded());
+ const GsymReader &GR = ExpectedGR.get();
+
+ // Look up the function and verify the name is the full mangled name.
+ auto ExpFI = GR.getFunctionInfo(FuncAddr);
+ ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
+ EXPECT_EQ(GR.getString(ExpFI->Name), "_Z10make_ftypePci");
+ // Verify it still has line table info.
+ EXPECT_TRUE(ExpFI->OptLineTable.has_value());
+}
+
+TEST(GSYMTest, TestMangledNameReplacementNegative) {
+ // Test negative cases: no replacement should happen when both names are
+ // mangled, or when the short name is not a substring of the demangled name.
+ GsymCreator GC;
+ const auto ByteOrder = llvm::endianness::native;
+ constexpr uint64_t FuncAddr = 0x2000;
+ constexpr uint64_t FuncSize = 0x100;
+
+ // Case 1: Both names are mangled — no replacement.
+ const uint32_t Mangled1 = GC.insertString("_Z3foov");
+ const uint32_t Mangled2 = GC.insertString("_Z3barv");
+
+ GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, Mangled1));
+ FunctionInfo DwarfFI1(FuncAddr, FuncSize, Mangled2);
+ DwarfFI1.OptLineTable = LineTable();
+ const uint32_t FileIdx = GC.insertFile("/tmp/test.cpp");
+ DwarfFI1.OptLineTable->push(LineEntry(FuncAddr, FileIdx, 5));
+ GC.addFunctionInfo(std::move(DwarfFI1));
+
+ // Case 2: Short name is NOT a substring — no replacement.
+ constexpr uint64_t Func2Addr = 0x3000;
+ // _Z10make_ftypePci demangles to make_ftype(char*, int)
+ const uint32_t MangledName = GC.insertString("_Z10make_ftypePci");
+ const uint32_t UnrelatedName = GC.insertString("some_other_func");
+
+ GC.addFunctionInfo(FunctionInfo(Func2Addr, FuncSize, MangledName));
+ FunctionInfo DwarfFI2(Func2Addr, FuncSize, UnrelatedName);
+ DwarfFI2.OptLineTable = LineTable();
+ DwarfFI2.OptLineTable->push(LineEntry(Func2Addr, FileIdx, 15));
+ GC.addFunctionInfo(std::move(DwarfFI2));
+
+ OutputAggregator Null(nullptr);
+ Error FinalizeErr = GC.finalize(Null);
+ ASSERT_FALSE(FinalizeErr);
+
+ SmallString<512> Str;
+ raw_svector_ostream OutStrm(Str);
+ FileWriter FW(OutStrm, ByteOrder);
+ ASSERT_FALSE(bool(GC.encode(FW)));
+
+ auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str());
+ ASSERT_THAT_EXPECTED(ExpectedGR, Succeeded());
+ const GsymReader &GR = ExpectedGR.get();
+
+ // Case 1: Both mangled — should keep the DWARF entry's name (which sorts
+ // last due to having rich info), not replace it.
+ auto ExpFI1 = GR.getFunctionInfo(FuncAddr);
+ ASSERT_THAT_EXPECTED(ExpFI1, Succeeded());
+ // The DWARF entry with _Z3barv should be kept (it has rich info).
+ EXPECT_EQ(GR.getString(ExpFI1->Name), "_Z3barv");
+
+ // Case 2: Unrelated name — no replacement, keep DWARF entry's name.
+ auto ExpFI2 = GR.getFunctionInfo(Func2Addr);
+ ASSERT_THAT_EXPECTED(ExpFI2, Succeeded());
+ EXPECT_EQ(GR.getString(ExpFI2->Name), "some_other_func");
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/184221
More information about the llvm-commits
mailing list