[Lldb-commits] [lldb] Reapply "[lldb] Tolerate multiple compile units with the same DWO ID (#100577)" (PR #104041)
Pavel Labath via lldb-commits
lldb-commits at lists.llvm.org
Wed Aug 14 08:36:30 PDT 2024
https://github.com/labath created https://github.com/llvm/llvm-project/pull/104041
The only change vs. the first version of the patch is that I've made
DWARFUnit linking thread-safe/unit. This was necessary because, during the
indexing step, two skeleton units could attempt to associate themselves
with the split unit.
The original commit message was:
I ran into this when LTO completely emptied two compile units, so they
ended up with the same hash (see #100375). Although, ideally, the
compiler would try to ensure we don't end up with a hash collision even
in this case, guaranteeing their absence is practically impossible. This
patch ensures this situation does not bring down lldb.
>From ba693941ec0bc5f829bf57575bcc26a8664fdcc4 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Tue, 13 Aug 2024 07:08:17 +0000
Subject: [PATCH 1/2] Reapply "[lldb] Tolerate multiple compile units with the
same DWO ID (#100577)"
The only change vs. the first version of the patch is that I've made
DWARFUnit linking thread-safe/unit. This was necessary because, during the
indexing step, two skeleton units could attempt to associate themselves
with the split unit.
The original commit message was:
I ran into this when LTO completely emptied two compile units, so they
ended up with the same hash (see #100375). Although, ideally, the
compiler would try to ensure we don't end up with a hash collision even
in this case, guaranteeing their absence is practically impossible. This
patch ensures this situation does not bring down lldb.
---
.../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 26 ++--
.../Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +-
.../SymbolFile/DWARF/x86/dwp-hash-collision.s | 142 ++++++++++++++++++
3 files changed, 156 insertions(+), 14 deletions(-)
create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 66a762bf9b6854..0a52159d055bb4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
*m_dwo_id, m_first_die.GetOffset()));
return; // Can't fetch the compile unit from the dwo file.
}
- // If the skeleton compile unit gets its unit DIE parsed first, then this
- // will fill in the DWO file's back pointer to this skeleton compile unit.
- // If the DWO files get parsed on their own first the skeleton back link
- // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will
- // do a reverse lookup and cache the result.
- dwo_cu->SetSkeletonUnit(this);
+
+ // Link the DWO unit to this object, if it hasn't been linked already (this
+ // can happen when we have an index, and the DWO unit is parsed first).
+ if (!dwo_cu->LinkToSkeletonUnit(*this)) {
+ SetDwoError(Status::createWithFormat(
+ "multiple compile units with Dwo ID {0:x16}", *m_dwo_id));
+ return;
+ }
DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
if (!dwo_cu_die.IsValid()) {
@@ -718,13 +720,11 @@ DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit);
}
-void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) {
- // If someone is re-setting the skeleton compile unit backlink, make sure
- // it is setting it to a valid value when it wasn't valid, or if the
- // value in m_skeleton_unit was valid, it should be the same value.
- assert(skeleton_unit);
- assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit);
- m_skeleton_unit = skeleton_unit;
+bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
+ if (m_skeleton_unit && m_skeleton_unit != &skeleton_unit)
+ return false;
+ m_skeleton_unit = &skeleton_unit;
+ return true;
}
bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 85c37971ced8e0..209104fe3a054e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -170,7 +170,7 @@ class DWARFUnit : public UserID {
/// both cases correctly and avoids crashes.
DWARFCompileUnit *GetSkeletonUnit();
- void SetSkeletonUnit(DWARFUnit *skeleton_unit);
+ bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit);
bool Supports_DW_AT_APPLE_objc_complete_type();
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
new file mode 100644
index 00000000000000..d626b4602ad58f
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
@@ -0,0 +1,142 @@
+## Test that lldb handles (mainly, that it doesn't crash) the situation where
+## two skeleton compile units have the same DWO ID (and try to claim the same
+## split unit from the DWP file. This can sometimes happen when the compile unit
+## is nearly empty (e.g. because LTO has optimized all of it away).
+
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp
+# RUN: %lldb %t -o "image lookup -t my_enum_type" \
+# RUN: -o "image dump separate-debug-info" -o exit | FileCheck %s
+
+## Check that we're able to access the type within the split unit (no matter
+## which skeleton unit it ends up associated with). Completely ignoring the unit
+## might also be reasonable.
+# CHECK: image lookup -t my_enum_type
+# CHECK: 1 match found
+# CHECK: name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type {
+# CHECK-NEXT: }"
+#
+## Check that we get some indication of the error.
+# CHECK: image dump separate-debug-info
+# CHECK: Dwo ID Err Dwo Path
+# CHECK: 0xdeadbeefbaadf00d E multiple compile units with Dwo ID 0xdeadbeefbaadf00d
+
+.set DWO_ID, 0xdeadbeefbaadf00d
+
+## The main file.
+.ifdef MAIN
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 74 # DW_TAG_compile_unit
+ .byte 0 # DW_CHILDREN_no
+ .byte 0x76 # DW_AT_dwo_name
+ .byte 8 # DW_FORM_string
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+
+
+ .section .debug_info,"", at progbits
+.irpc I,01
+.Lcu_begin\I:
+ .long .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit
+.Ldebug_info_start\I:
+ .short 5 # DWARF version number
+ .byte 4 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .quad DWO_ID # DWO id
+ .byte 1 # Abbrev [1] DW_TAG_compile_unit
+ .ascii "foo"
+ .byte '0' + \I
+ .asciz ".dwo\0" # DW_AT_dwo_name
+.Ldebug_info_end\I:
+.endr
+
+.else
+## DWP file starts here.
+
+ .section .debug_abbrev.dwo,"e", at progbits
+.LAbbrevBegin:
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 8 # DW_FORM_string
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 4 # DW_TAG_enumeration_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 8 # DW_FORM_string
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 8 # DW_FORM_string
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+.LAbbrevEnd:
+ .section .debug_info.dwo,"e", at progbits
+.LCUBegin:
+.Lcu_begin1:
+ .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+ .short 5 # DWARF version number
+ .byte 5 # DWARF Unit Type
+ .byte 8 # Address Size (in bytes)
+ .long 0 # Offset Into Abbrev. Section
+ .quad DWO_ID # DWO id
+ .byte 1 # Abbrev [1] DW_TAG_compile_unit
+ .asciz "Hand-written DWARF" # DW_AT_producer
+ .short 12 # DW_AT_language
+ .byte 2 # Abbrev [2] DW_TAG_enumeration_type
+ .asciz "my_enum_type" # DW_AT_name
+ .long .Lint-.Lcu_begin1 # DW_AT_type
+ .byte 4 # DW_AT_byte_size
+.Lint:
+ .byte 4 # Abbrev [4] DW_TAG_base_type
+ .asciz "int" # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end1:
+.LCUEnd:
+ .section .debug_cu_index, "", @progbits
+## Header:
+ .short 5 # Version
+ .short 0 # Padding
+ .long 2 # Section count
+ .long 1 # Unit count
+ .long 2 # Slot count
+## Hash Table of Signatures:
+ .quad 0
+ .quad DWO_ID
+## Parallel Table of Indexes:
+ .long 0
+ .long 1
+## Table of Section Offsets:
+## Row 0:
+ .long 1 # DW_SECT_INFO
+ .long 3 # DW_SECT_ABBREV
+## Row 1:
+ .long 0 # Offset in .debug_info.dwo
+ .long 0 # Offset in .debug_abbrev.dwo
+## Table of Section Sizes:
+ .long .LCUEnd-.LCUBegin # Size in .debug_info.dwo
+ .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo
+.endif
>From 558d3d648a320b53bdca5d444e033917ff20f751 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 14 Aug 2024 14:49:21 +0000
Subject: [PATCH 2/2] make atomic
---
.../Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 23 ++++++++++++-------
.../Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +-
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 0a52159d055bb4..81f937762e35a6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -710,21 +710,28 @@ uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) {
uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; }
DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
- if (m_skeleton_unit == nullptr && IsDWOUnit()) {
+ if (m_skeleton_unit.load() == nullptr && IsDWOUnit()) {
SymbolFileDWARFDwo *dwo =
llvm::dyn_cast_or_null<SymbolFileDWARFDwo>(&GetSymbolFileDWARF());
// Do a reverse lookup if the skeleton compile unit wasn't set.
- if (dwo)
- m_skeleton_unit = dwo->GetBaseSymbolFile().GetSkeletonUnit(this);
+ DWARFUnit *candidate_skeleton_unit =
+ dwo ? dwo->GetBaseSymbolFile().GetSkeletonUnit(this) : nullptr;
+ if (candidate_skeleton_unit)
+ (void)LinkToSkeletonUnit(*candidate_skeleton_unit);
+ // Linking may fail due to a race, so be sure to return the actual value.
}
- return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit);
+ return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit.load());
}
bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
- if (m_skeleton_unit && m_skeleton_unit != &skeleton_unit)
- return false;
- m_skeleton_unit = &skeleton_unit;
- return true;
+ DWARFUnit *expected_unit = nullptr;
+ if (m_skeleton_unit.compare_exchange_strong(expected_unit, &skeleton_unit))
+ return true;
+ if (expected_unit == &skeleton_unit) {
+ // Exchange failed because it already contained the right value.
+ return true;
+ }
+ return false; // Already linked to a different unit.
}
bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 209104fe3a054e..148932d67b908c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -308,7 +308,7 @@ class DWARFUnit : public UserID {
const llvm::DWARFAbbreviationDeclarationSet *m_abbrevs = nullptr;
lldb_private::CompileUnit *m_lldb_cu = nullptr;
// If this is a DWO file, we have a backlink to our skeleton compile unit.
- DWARFUnit *m_skeleton_unit = nullptr;
+ std::atomic<DWARFUnit *> m_skeleton_unit = nullptr;
// The compile unit debug information entry item
DWARFDebugInfoEntry::collection m_die_array;
mutable llvm::sys::RWMutex m_die_array_mutex;
More information about the lldb-commits
mailing list