[llvm] [DebugInfo] Support to get TU for hash from .debug_types.dwo section in DWARF4. (PR #161067)

Liu Ke via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 21 00:43:35 PDT 2025


https://github.com/Sockke updated https://github.com/llvm/llvm-project/pull/161067

>From a53da27bf27ff6299493917ce510ec7d6c8b21a9 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Fri, 26 Sep 2025 16:49:44 +0800
Subject: [PATCH 1/4] [DebugInfo] Support to get type units for hash from
 .debug_types.dwo section in Dwarf4

---
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h |  4 +++-
 llvm/lib/DebugInfo/DWARF/DWARFContext.cpp     | 19 ++++++++++++----
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp        | 22 +++++++++++++------
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 964ff8e396660..3e8d7e8b32fc8 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,7 +143,9 @@ class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1>
       decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
 
   LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
-  LLVM_ABI DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E);
+  LLVM_ABI DWARFUnit *
+  getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
+                       const DWARFSection *Section = nullptr);
 
   /// Read units from a .debug_info or .debug_types section.  Calls made
   /// before finishedInfoUnits() are assumed to be for .debug_info sections,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 73df62abaf023..c40042c2958e4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1344,9 +1344,20 @@ void DWARFContext::dump(
 DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
   DWARFUnitVector &DWOUnits = State->getDWOUnits();
   if (const auto &TUI = getTUIndex()) {
-    if (const auto *R = TUI.getFromHash(Hash))
-      return dyn_cast_or_null<DWARFTypeUnit>(
-          DWOUnits.getUnitForIndexEntry(*R));
+    if (const auto *R = TUI.getFromHash(Hash)) {
+      if (TUI.getVersion() >= 5)
+        return dyn_cast_or_null<DWARFTypeUnit>(
+            DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
+      else {
+        DWARFUnit *TypesUnit = nullptr;
+        getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
+          if (!TypesUnit)
+            TypesUnit =
+                DWOUnits.getUnitForIndexEntry(*R, DW_SECT_EXT_TYPES, &S);
+        });
+        return dyn_cast_or_null<DWARFTypeUnit>(TypesUnit);
+      }
+    }
     return nullptr;
   }
   return State->getTypeUnitMap(IsDWO).lookup(Hash);
@@ -1358,7 +1369,7 @@ DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
   if (const auto &CUI = getCUIndex()) {
     if (const auto *R = CUI.getFromHash(Hash))
       return dyn_cast_or_null<DWARFCompileUnit>(
-          DWOUnits.getUnitForIndexEntry(*R));
+          DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
     return nullptr;
   }
 
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ef59c82fc6a01..da0bf03e1ac57 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -161,17 +161,24 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const {
   return nullptr;
 }
 
-DWARFUnit *
-DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
-  const auto *CUOff = E.getContribution(DW_SECT_INFO);
+DWARFUnit *DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E,
+                                                 DWARFSectionKind Sec,
+                                                 const DWARFSection *Section) {
+  const auto *CUOff = E.getContribution(Sec);
   if (!CUOff)
     return nullptr;
 
   uint64_t Offset = CUOff->getOffset();
-  auto end = begin() + getNumInfoUnits();
+  auto begin = this->begin();
+  auto end = begin + getNumInfoUnits();
+
+  if (Sec == DW_SECT_EXT_TYPES) {
+    begin = end;
+    end = this->end();
+  }
 
   auto *CU =
-      std::upper_bound(begin(), end, CUOff->getOffset(),
+      std::upper_bound(begin, end, CUOff->getOffset(),
                        [](uint64_t LHS, const std::unique_ptr<DWARFUnit> &RHS) {
                          return LHS < RHS->getNextUnitOffset();
                        });
@@ -181,13 +188,14 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
   if (!Parser)
     return nullptr;
 
-  auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
+  auto U = Parser(Offset, Sec, Section, &E);
   if (!U)
     return nullptr;
 
   auto *NewCU = U.get();
   this->insert(CU, std::move(U));
-  ++NumInfoUnits;
+  if (Sec == DW_SECT_INFO)
+    ++NumInfoUnits;
   return NewCU;
 }
 

>From 90f8414709bd651e69e3a1c09726cae6ea5a4738 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Tue, 14 Oct 2025 17:47:08 +0800
Subject: [PATCH 2/4] added test file

---
 .../X86/type_units_split_dwp_v4.s             | 242 ++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s

diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
new file mode 100644
index 0000000000000..b3773fc4a299b
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -0,0 +1,242 @@
+# RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
+# RUN: llvm-dwp -e test.o -o test.dwp
+# RUN: llvm-dwarfdump test.dwp | FileCheck %s
+
+# Generated from:
+#
+#   struct t1 { };
+#   t1 v1;
+#
+# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.4.split.dwp.s -gdwarf-4
+
+# CHECK: DW_TAG_variable
+# CHECK:   DW_AT_type ({{.*}} "t1")
+	.file	"test.cpp"
+	.section	.debug_types.dwo,"e", at progbits
+	.long	.Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+	.short	4                               # DWARF version number
+	.long	0                               # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.quad	-4149699470930386446            # Type Signature
+	.long	30                              # Type DIE Offset
+	.byte	1                               # Abbrev [1] 0x17:0xe DW_TAG_type_unit
+	.short	33                              # DW_AT_language
+	.long	0                               # DW_AT_stmt_list
+	.byte	2                               # Abbrev [2] 0x1e:0x6 DW_TAG_structure_type
+	.byte	5                               # DW_AT_calling_convention
+	.byte	1                               # DW_AT_name
+	.byte	1                               # DW_AT_byte_size
+	.byte	1                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_dwo_end0:
+	.file	1 "." "test.cpp"
+	.type	v1, at object                      # @v1
+	.bss
+	.globl	v1
+v1:
+	.zero	1
+	.size	v1, 1
+
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	0                               # DW_CHILDREN_no
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	14                              # DW_FORM_strp
+	.ascii	"\264B"                         # DW_AT_GNU_pubnames
+	.byte	25                              # DW_FORM_flag_present
+	.ascii	"\260B"                         # DW_AT_GNU_dwo_name
+	.byte	14                              # DW_FORM_strp
+	.ascii	"\261B"                         # DW_AT_GNU_dwo_id
+	.byte	7                               # DW_FORM_data8
+	.ascii	"\263B"                         # DW_AT_GNU_addr_base
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	4                               # DWARF version number
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.byte	1                               # Abbrev [1] 0xb:0x19 DW_TAG_compile_unit
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Lskel_string0                  # DW_AT_comp_dir
+                                        # DW_AT_GNU_pubnames
+	.long	.Lskel_string1                  # DW_AT_GNU_dwo_name
+	.quad	1388839634901268525             # DW_AT_GNU_dwo_id
+	.long	.Laddr_table_base0              # DW_AT_GNU_addr_base
+.Ldebug_info_end0:
+	.section	.debug_str,"MS", at progbits,1
+.Lskel_string0:
+	.asciz	"." # string offset=0
+.Lskel_string1:
+	.asciz	"test.dwo"                      # string offset=2
+	.section	.debug_str.dwo,"eMS", at progbits,1
+.Linfo_string0:
+	.asciz	"v1"                            # string offset=0
+.Linfo_string1:
+	.asciz	"t1"                            # string offset=3
+.Linfo_string2:
+	.asciz	"clang version 22.0.0" # string offset=6
+.Linfo_string3:
+	.asciz	"test.cpp"                      # string offset=27
+.Linfo_string4:
+	.asciz	"test.dwo"                      # string offset=36
+	.section	.debug_str_offsets.dwo,"e", at progbits
+	.long	0
+	.long	3
+	.long	6
+	.long	27
+	.long	36
+	.section	.debug_info.dwo,"e", at progbits
+	.long	.Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
+.Ldebug_info_dwo_start1:
+	.short	4                               # DWARF version number
+	.long	0                               # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.byte	3                               # Abbrev [3] 0xb:0x23 DW_TAG_compile_unit
+	.byte	2                               # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.byte	3                               # DW_AT_name
+	.byte	4                               # DW_AT_GNU_dwo_name
+	.quad	1388839634901268525             # DW_AT_GNU_dwo_id
+	.byte	4                               # Abbrev [4] 0x19:0xb DW_TAG_variable
+	.byte	0                               # DW_AT_name
+	.long	36                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	1                               # DW_AT_decl_file
+	.byte	2                               # DW_AT_decl_line
+	.byte	2                               # DW_AT_location
+	.byte	251
+	.byte	0
+	.byte	5                               # Abbrev [5] 0x24:0x9 DW_TAG_structure_type
+                                        # DW_AT_declaration
+	.quad	-4149699470930386446            # DW_AT_signature
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_dwo_end1:
+	.section	.debug_abbrev.dwo,"e", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	65                              # DW_TAG_type_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	54                              # DW_AT_calling_convention
+	.byte	11                              # DW_FORM_data1
+	.byte	3                               # DW_AT_name
+	.ascii	"\202>"                         # DW_FORM_GNU_str_index
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.ascii	"\202>"                         # DW_FORM_GNU_str_index
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.ascii	"\202>"                         # DW_FORM_GNU_str_index
+	.ascii	"\260B"                         # DW_AT_GNU_dwo_name
+	.ascii	"\202>"                         # DW_FORM_GNU_str_index
+	.ascii	"\261B"                         # DW_AT_GNU_dwo_id
+	.byte	7                               # DW_FORM_data8
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	52                              # DW_TAG_variable
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.ascii	"\202>"                         # DW_FORM_GNU_str_index
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	19                              # DW_TAG_structure_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	60                              # DW_AT_declaration
+	.byte	25                              # DW_FORM_flag_present
+	.byte	105                             # DW_AT_signature
+	.byte	32                              # DW_FORM_ref_sig8
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_line.dwo,"e", at progbits
+.Ltmp0:
+	.long	.Ldebug_line_end0-.Ldebug_line_start0 # unit length
+.Ldebug_line_start0:
+	.short	4
+	.long	.Lprologue_end0-.Lprologue_start0
+.Lprologue_start0:
+	.byte	1
+	.byte	1
+	.byte	1
+	.byte	-5
+	.byte	14
+	.byte	1
+	.byte	0
+	.ascii	"test.cpp"
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+.Lprologue_end0:
+.Ldebug_line_end0:
+	.section	.debug_addr,"", at progbits
+.Laddr_table_base0:
+	.quad	v1
+	.section	.debug_gnu_pubnames,"", at progbits
+	.long	.LpubNames_end0-.LpubNames_start0 # Length of Public Names Info
+.LpubNames_start0:
+	.short	2                               # DWARF Version
+	.long	.Lcu_begin0                     # Offset of Compilation Unit Info
+	.long	36                              # Compilation Unit Length
+	.long	25                              # DIE offset
+	.byte	32                              # Attributes: VARIABLE, EXTERNAL
+	.asciz	"v1"                            # External Name
+	.long	0                               # End Mark
+.LpubNames_end0:
+	.section	.debug_gnu_pubtypes,"", at progbits
+	.long	.LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info
+.LpubTypes_start0:
+	.short	2                               # DWARF Version
+	.long	.Lcu_begin0                     # Offset of Compilation Unit Info
+	.long	36                              # Compilation Unit Length
+	.long	36                              # DIE offset
+	.byte	16                              # Attributes: TYPE, EXTERNAL
+	.asciz	"t1"                            # External Name
+	.long	0                               # End Mark
+.LpubTypes_end0:
+	.ident	"clang version 22.0.0"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:

>From 3ce4c34e501a5c4f547c8de3afdfa69f5bf6cccd Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Wed, 15 Oct 2025 13:51:56 +0800
Subject: [PATCH 3/4] enable the bolt test

---
 bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
index 8077cc0808238..401da48faa3bd 100644
--- a/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
+++ b/bolt/test/X86/dwarf4-ftypes-dwp-input-dwo-output.test
@@ -1,4 +1,3 @@
-# UNSUPPORTED: system-linux
 ; RUN: rm -rf %t
 ; RUN: mkdir %t
 ; RUN: cd %t

>From 3d5ec5732e7fc732c21c8d89585c158bbd780ba7 Mon Sep 17 00:00:00 2001
From: "liuke.gehry" <liuke.gehry at bytedance.com>
Date: Tue, 21 Oct 2025 15:13:29 +0800
Subject: [PATCH 4/4] annotation and format

---
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h                | 2 ++
 llvm/lib/DebugInfo/DWARF/DWARFContext.cpp                    | 4 ++--
 llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 3e8d7e8b32fc8..8731c1dc7b7b7 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -143,6 +143,8 @@ class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1>
       decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit));
 
   LLVM_ABI DWARFUnit *getUnitForOffset(uint64_t Offset) const;
+  /// Returns the Unit from the .debug_info or .debug_types section by the index
+  /// entry.
   LLVM_ABI DWARFUnit *
   getUnitForIndexEntry(const DWARFUnitIndex::Entry &E, DWARFSectionKind Sec,
                        const DWARFSection *Section = nullptr);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index c40042c2958e4..41cea4530b990 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1345,10 +1345,10 @@ DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint64_t Hash, bool IsDWO) {
   DWARFUnitVector &DWOUnits = State->getDWOUnits();
   if (const auto &TUI = getTUIndex()) {
     if (const auto *R = TUI.getFromHash(Hash)) {
-      if (TUI.getVersion() >= 5)
+      if (TUI.getVersion() >= 5) {
         return dyn_cast_or_null<DWARFTypeUnit>(
             DWOUnits.getUnitForIndexEntry(*R, DW_SECT_INFO));
-      else {
+      } else {
         DWARFUnit *TypesUnit = nullptr;
         getDWARFObj().forEachTypesDWOSections([&](const DWARFSection &S) {
           if (!TypesUnit)
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
index b3773fc4a299b..becd9d1b55693 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/type_units_split_dwp_v4.s
@@ -1,3 +1,6 @@
+## This test uses TU index for type parsing in dwp and makes sure the DWARF4 type is
+## successfully retrieved.
+
 # RUN: llvm-mc %s --split-dwarf-file=test.dwo -filetype obj -triple x86_64 -o test.o
 # RUN: llvm-dwp -e test.o -o test.dwp
 # RUN: llvm-dwarfdump test.dwp | FileCheck %s



More information about the llvm-commits mailing list