[llvm] [DebugInfo] Support to get TU for hash from .debug_types.dwo section in DWARF4. (PR #161067)
Liu Ke via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 00:29:37 PDT 2025
https://github.com/Sockke updated https://github.com/llvm/llvm-project/pull/161067
>From a53da27bf27ff6299493917ce510ec7d6c8b21a9 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Fri, 26 Sep 2025 16:49:44 +0800
Subject: [PATCH 1/4] [DebugInfo] Support to get type units for hash from
.debug_types.dwo section in Dwarf4
---
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 4 +++-
llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 19 ++++++++++++----
llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 22 +++++++++++++------
3 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 964ff8e396660..3e8d7e8b32fc8 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,7 +143,9 @@ class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1>
decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
- LLVM_ABI DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+ LLVM_ABI DWARFUnit *
+ getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
+ const DWARFSection *Section = nullptr);
/// Read units from a .debug_info or .debug_types section. Calls made
/// before finishedInfoUnits() are assumed to be for .debug_info sections,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 73df62abaf023..c40042c2958e4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1344,9 +1344,20 @@ void DWARFContext::dump(
DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
- if (const auto *R = TUI.getFromHash(Hash))
- return dyn_cast_or_null<DWARFTypeUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ if (const auto *R = TUI.getFromHash(Hash)) {
+ if (TUI.getVersion() >= 5)
+ return dyn_cast_or_null<DWARFTypeUnit>(
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
+ else {
+ DWARFUnit *TypesUnit = nullptr;
+ getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
+ if (!TypesUnit)
+ TypesUnit =
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_EXT_TYPES, &S);
+ });
+ return dyn_cast_or_null<DWARFTypeUnit>(TypesUnit);
+ }
+ }
return nullptr;
}
return State->getTypeUnitMap(IsDWO).lookup(Hash);
@@ -1358,7 +1369,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
if (const auto &CUI = getCUIndex()) {
if (const auto *R = CUI.getFromHash(Hash))
return dyn_cast_or_null<DWARFCompileUnit>(
- DWOUnits.getUnitForIndexEntry(*R));
+ DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
return nullptr;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ef59c82fc6a01..da0bf03e1ac57 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -161,17 +161,24 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
return nullptr;
}
-DWARFUnit *
-DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
- const auto *CUOff = E.getContribution(DW_SECT_INFO);
+DWARFUnit *DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E,
+ DWARFSectionKind Sec,
+ const DWARFSection *Section) {
+ const auto *CUOff = E.getContribution(Sec);
if (!CUOff)
return nullptr;
uint64_t Offset = CUOff->getOffset();
- auto end = begin() + getNumInfoUnits();
+ auto begin = this->begin();
+ auto end = begin + getNumInfoUnits();
+
+ if (Sec == DW_SECT_EXT_TYPES) {
+ begin = end;
+ end = this->end();
+ }
auto *CU =
- std::upper_bound(begin(), end, CUOff->getOffset(),
+ std::upper_bound(begin, end, CUOff->getOffset(),
[](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
return LHS < RHS->getNextUnitOffset();
});
@@ -181,13 +188,14 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
if (!Parser)
return nullptr;
- auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+ auto U = Parser(Offset, Sec, Section, &E);
if (!U)
return nullptr;
auto *NewCU = U.get();
this->insert(CU, std::move(U));
- ++NumInfoUnits;
+ if (Sec == DW_SECT_INFO)
+ ++NumInfoUnits;
return NewCU;
}
>From 90f8414709bd651e69e3a1c09726cae6ea5a4738 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Tue, 14 Oct 2025 17:47:08 +0800
Subject: [PATCH 2/4] added test file
---
.../X86/type_units_split_dwp_v4.s | 242 ++++++++++++++++++
1 file changed, 242 insertions(+)
create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
new file mode 100644
index 0000000000000..b3773fc4a299b
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -0,0 +1,242 @@
+# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
+# RUN: llvm-dwp -e test.o -o test.dwp
+# RUN: llvm-dwarfdump test.dwp | FileCheck %s
+
+# Generated from:
+#
+# struct t1 { };
+# t1 v1;
+#
+# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.4.split.dwp.s -gdwarf-4
+
+# CHECK: DW_TAG_variable
+# CHECK: DW_AT_type ({{.*}} "t1")
+ .file "test.cpp"
+ .section .debug_types.dwo,"e", at progbits
+ .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .quad -4149699470930386446 # Type Signature
+ .long 30 # Type DIE Offset
+ .byte 1 # Abbrev [1] 0x17:0xe DW_TAG_type_unit
+ .short 33 # DW_AT_language
+ .long 0 # DW_AT_stmt_list
+ .byte 2 # Abbrev [2] 0x1e:0x6 DW_TAG_structure_type
+ .byte 5 # DW_AT_calling_convention
+ .byte 1 # DW_AT_name
+ .byte 1 # DW_AT_byte_size
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end0:
+ .file 1 "." "test.cpp"
+ .type v1, at object # @v1
+ .bss
+ .globl v1
+v1:
+ .zero 1
+ .size v1, 1
+
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .ascii "\264B" # DW_AT_GNU_pubnames
+ .byte 25 # DW_FORM_flag_present
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .byte 14 # DW_FORM_strp
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .ascii "\263B" # DW_AT_GNU_addr_base
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x19 DW_TAG_compile_unit
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Lskel_string0 # DW_AT_comp_dir
+ # DW_AT_GNU_pubnames
+ .long .Lskel_string1 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .long .Laddr_table_base0 # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+ .section .debug_str,"MS", at progbits,1
+.Lskel_string0:
+ .asciz "." # string offset=0
+.Lskel_string1:
+ .asciz "test.dwo" # string offset=2
+ .section .debug_str.dwo,"eMS", at progbits,1
+.Linfo_string0:
+ .asciz "v1" # string offset=0
+.Linfo_string1:
+ .asciz "t1" # string offset=3
+.Linfo_string2:
+ .asciz "clang version 22.0.0" # string offset=6
+.Linfo_string3:
+ .asciz "test.cpp" # string offset=27
+.Linfo_string4:
+ .asciz "test.dwo" # string offset=36
+ .section .debug_str_offsets.dwo,"e", at progbits
+ .long 0
+ .long 3
+ .long 6
+ .long 27
+ .long 36
+ .section .debug_info.dwo,"e", at progbits
+ .long .Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
+.Ldebug_info_dwo_start1:
+ .short 4 # DWARF version number
+ .long 0 # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 3 # Abbrev [3] 0xb:0x23 DW_TAG_compile_unit
+ .byte 2 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .byte 3 # DW_AT_name
+ .byte 4 # DW_AT_GNU_dwo_name
+ .quad 1388839634901268525 # DW_AT_GNU_dwo_id
+ .byte 4 # Abbrev [4] 0x19:0xb DW_TAG_variable
+ .byte 0 # DW_AT_name
+ .long 36 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_decl_file
+ .byte 2 # DW_AT_decl_line
+ .byte 2 # DW_AT_location
+ .byte 251
+ .byte 0
+ .byte 5 # Abbrev [5] 0x24:0x9 DW_TAG_structure_type
+ # DW_AT_declaration
+ .quad -4149699470930386446 # DW_AT_signature
+ .byte 0 # End Of Children Mark
+.Ldebug_info_dwo_end1:
+ .section .debug_abbrev.dwo,"e", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 65 # DW_TAG_type_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 54 # DW_AT_calling_convention
+ .byte 11 # DW_FORM_data1
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\260B" # DW_AT_GNU_dwo_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .ascii "\261B" # DW_AT_GNU_dwo_id
+ .byte 7 # DW_FORM_data8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 52 # DW_TAG_variable
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .ascii "\202>" # DW_FORM_GNU_str_index
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 19 # DW_TAG_structure_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 60 # DW_AT_declaration
+ .byte 25 # DW_FORM_flag_present
+ .byte 105 # DW_AT_signature
+ .byte 32 # DW_FORM_ref_sig8
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_line.dwo,"e", at progbits
+.Ltmp0:
+ .long .Ldebug_line_end0-.Ldebug_line_start0 # unit length
+.Ldebug_line_start0:
+ .short 4
+ .long .Lprologue_end0-.Lprologue_start0
+.Lprologue_start0:
+ .byte 1
+ .byte 1
+ .byte 1
+ .byte -5
+ .byte 14
+ .byte 1
+ .byte 0
+ .ascii "test.cpp"
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+.Lprologue_end0:
+.Ldebug_line_end0:
+ .section .debug_addr,"", at progbits
+.Laddr_table_base0:
+ .quad v1
+ .section .debug_gnu_pubnames,"", at progbits
+ .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
+.LpubNames_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 25 # DIE offset
+ .byte 32 # Attributes: VARIABLE, EXTERNAL
+ .asciz "v1" # External Name
+ .long 0 # End Mark
+.LpubNames_end0:
+ .section .debug_gnu_pubtypes,"", at progbits
+ .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
+.LpubTypes_start0:
+ .short 2 # DWARF Version
+ .long .Lcu_begin0 # Offset of Compilation Unit Info
+ .long 36 # Compilation Unit Length
+ .long 36 # DIE offset
+ .byte 16 # Attributes: TYPE, EXTERNAL
+ .asciz "t1" # External Name
+ .long 0 # End Mark
+.LpubTypes_end0:
+ .ident "clang version 22.0.0"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
>From 3ce4c34e501a5c4f547c8de3afdfa69f5bf6cccd Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Wed, 15 Oct 2025 13:51:56 +0800
Subject: [PATCH 3/4] enable the bolt test
---
bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test | 1 -
1 file changed, 1 deletion(-)
diff --git a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
index 8077cc0808238..401da48faa3bd 100644
--- a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
+++ b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
@@ -1,4 +1,3 @@
-# UNSUPPORTED: system-linux
; RUN: rm -rf %t
; RUN: mkdir %t
; RUN: cd %t
>From ba88da15bd99badcd522f0c873ad58bb1e302d33 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Tue, 21 Oct 2025 15:13:29 +0800
Subject: [PATCH 4/4] annotation and format
---
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 2 ++
llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 4 ++--
llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s | 3 +++
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 3e8d7e8b32fc8..8731c1dc7b7b7 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,6 +143,8 @@ class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1>
decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
+ /// Returns the Unit from the .debug_info or .debug_types section by the index
+ /// entry.
LLVM_ABI DWARFUnit *
getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
const DWARFSection *Section = nullptr);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index c40042c2958e4..41cea4530b990 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1345,10 +1345,10 @@ DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
DWARFUnitVector &DWOUnits = State->getDWOUnits();
if (const auto &TUI = getTUIndex()) {
if (const auto *R = TUI.getFromHash(Hash)) {
- if (TUI.getVersion() >= 5)
+ if (TUI.getVersion() >= 5) {
return dyn_cast_or_null<DWARFTypeUnit>(
DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
- else {
+ } else {
DWARFUnit *TypesUnit = nullptr;
getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
if (!TypesUnit)
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
index b3773fc4a299b..2c8e2bbcca540 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -1,3 +1,6 @@
+# This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is
+# successfully retrieved.
+
# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
# RUN: llvm-dwp -e test.o -o test.dwp
# RUN: llvm-dwarfdump test.dwp | FileCheck %s
More information about the llvm-commits
mailing list