[Lldb-commits] [lldb] [lldb] Tolerate multiple compile units with the same DWO ID (PR #100577)
Pavel Labath via lldb-commits
lldb-commits at lists.llvm.org
Thu Jul 25 07:27:08 PDT 2024
https://github.com/labath created https://github.com/llvm/llvm-project/pull/100577
I ran into this when LTO completely emptied two compile units, so they ended up with the same hash (see #100375). Although, ideally, the compiler would try to ensure we don't end up with a hash collision even in this case, guaranteeing their absence is practically impossible. This patch ensures this situation does not bring down lldb.
>From 357aecc085298fbad5a3be84f3508fa4314dadd0 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Thu, 25 Jul 2024 16:17:51 +0200
Subject: [PATCH] [lldb] Tolerate multiple compile units with the same DWO ID
I ran into this when LTO completely emptied two compile units, so they
ended up with the same hash (see #100375). Although, ideally, the
compiler would try to ensure we don't end up with a hash collision even
in this case, guaranteeing their absence is practically impossible. This
patch ensures this situation does not bring down lldb.
---
.../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 26 ++--
.../Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +-
.../SymbolFile/DWARF/x86/dwp-hash-collision.s | 142 ++++++++++++++++++
3 files changed, 156 insertions(+), 14 deletions(-)
create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 66a762bf9b685..0a52159d055bb 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
*m_dwo_id, m_first_die.GetOffset()));
return; // Can't fetch the compile unit from the dwo file.
}
- // If the skeleton compile unit gets its unit DIE parsed first, then this
- // will fill in the DWO file's back pointer to this skeleton compile unit.
- // If the DWO files get parsed on their own first the skeleton back link
- // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will
- // do a reverse lookup and cache the result.
- dwo_cu->SetSkeletonUnit(this);
+
+ // Link the DWO unit to this object, if it hasn't been linked already (this
+ // can happen when we have an index, and the DWO unit is parsed first).
+ if (!dwo_cu->LinkToSkeletonUnit(*this)) {
+ SetDwoError(Status::createWithFormat(
+ "multiple compile units with Dwo ID {0:x16}", *m_dwo_id));
+ return;
+ }
DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
if (!dwo_cu_die.IsValid()) {
@@ -718,13 +720,11 @@ DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit);
}
-void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) {
- // If someone is re-setting the skeleton compile unit backlink, make sure
- // it is setting it to a valid value when it wasn't valid, or if the
- // value in m_skeleton_unit was valid, it should be the same value.
- assert(skeleton_unit);
- assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit);
- m_skeleton_unit = skeleton_unit;
+bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
+ if (m_skeleton_unit && m_skeleton_unit != &skeleton_unit)
+ return false;
+ m_skeleton_unit = &skeleton_unit;
+ return true;
}
bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 85c37971ced8e..209104fe3a054 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -170,7 +170,7 @@ class DWARFUnit : public UserID {
/// both cases correctly and avoids crashes.
DWARFCompileUnit *GetSkeletonUnit();
- void SetSkeletonUnit(DWARFUnit *skeleton_unit);
+ bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit);
bool Supports_DW_AT_APPLE_objc_complete_type();
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
new file mode 100644
index 0000000000000..d626b4602ad58
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
@@ -0,0 +1,142 @@
+## Test that lldb handles (mainly, that it doesn't crash) the situation where
+## two skeleton compile units have the same DWO ID (and try to claim the same
+## split unit from the DWP file. This can sometimes happen when the compile unit
+## is nearly empty (e.g. because LTO has optimized all of it away).
+
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp
+# RUN: %lldb %t -o "image lookup -t my_enum_type" \
+# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s
+
+## Check that we're able to access the type within the split unit (no matter
+## which skeleton unit it ends up associated with). Completely ignoring the unit
+## might also be reasonable.
+# CHECK: image lookup -t my_enum_type
+# CHECK: 1 match found
+# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type {
+# CHECK-NEXT: }"
+#
+## Check that we get some indication of the error.
+# CHECK: image dump separate-debug-info
+# CHECK: Dwo ID Err Dwo Path
+# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d
+
+.set DWO_ID, 0xdeadbeefbaadf00d
+
+## The main file.
+.ifdef MAIN
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 0x76 # DW_AT_dwo_name
+ .byte 8 # DW_FORM_string
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+
+
+ .section .debug_info,"", at progbits
+.irpc I,01
+.Lcu_begin\I:
+ .long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit
+.Ldebug_info_start\I:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad DWO_ID # DWO id
+ .byte 1 # Abbrev [1] DW_TAG_compile_unit
+ .ascii "foo"
+ .byte '0' + \I
+ .asciz ".dwo\0" # DW_AT_dwo_name
+.Ldebug_info_end\I:
+.endr
+
+.else
+## DWP file starts here.
+
+ .section .debug_abbrev.dwo,"e", at progbits
+.LAbbrevBegin:
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 8 # DW_FORM_string
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 4 # DW_TAG_enumeration_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 8 # DW_FORM_string
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 8 # DW_FORM_string
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+.LAbbrevEnd:
+ .section .debug_info.dwo,"e", at progbits
+.LCUBegin:
+.Lcu_begin1:
+ .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+ .short 5 # DWARF version number
+ .byte 5 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long 0 # Offset Into Abbrev. Section
+ .quad DWO_ID # DWO id
+ .byte 1 # Abbrev [1] DW_TAG_compile_unit
+ .asciz "Hand-written DWARF" # DW_AT_producer
+ .short 12 # DW_AT_language
+ .byte 2 # Abbrev [2] DW_TAG_enumeration_type
+ .asciz "my_enum_type" # DW_AT_name
+ .long .Lint-.Lcu_begin1 # DW_AT_type
+ .byte 4 # DW_AT_byte_size
+.Lint:
+ .byte 4 # Abbrev [4] DW_TAG_base_type
+ .asciz "int" # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end1:
+.LCUEnd:
+ .section .debug_cu_index, "", @progbits
+## Header:
+ .short 5 # Version
+ .short 0 # Padding
+ .long 2 # Section count
+ .long 1 # Unit count
+ .long 2 # Slot count
+## Hash Table of Signatures:
+ .quad 0
+ .quad DWO_ID
+## Parallel Table of Indexes:
+ .long 0
+ .long 1
+## Table of Section Offsets:
+## Row 0:
+ .long 1 # DW_SECT_INFO
+ .long 3 # DW_SECT_ABBREV
+## Row 1:
+ .long 0 # Offset in .debug_info.dwo
+ .long 0 # Offset in .debug_abbrev.dwo
+## Table of Section Sizes:
+ .long .LCUEnd-.LCUBegin # Size in .debug_info.dwo
+ .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo
+.endif
More information about the lldb-commits
mailing list