[Lldb-commits] [lldb] [lldb] Tolerate multiple compile units with the same DWO ID (PR #100577)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Thu Jul 25 07:27:08 PDT 2024


https://github.com/labath created https://github.com/llvm/llvm-project/pull/100577

I ran into this when LTO completely emptied two compile units, so they ended up with the same hash (see #100375). Although, ideally, the compiler would try to ensure we don't end up with a hash collision even in this case, guaranteeing their absence is practically impossible. This patch ensures this situation does not bring down lldb.

>From 357aecc085298fbad5a3be84f3508fa4314dadd0 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Thu, 25 Jul 2024 16:17:51 +0200
Subject: [PATCH] [lldb] Tolerate multiple compile units with the same DWO ID

I ran into this when LTO completely emptied two compile units, so they
ended up with the same hash (see #100375). Although, ideally, the
compiler would try to ensure we don't end up with a hash collision even
in this case, guaranteeing their absence is practically impossible. This
patch ensures this situation does not bring down lldb.
---
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  26 ++--
 .../Plugins/SymbolFile/DWARF/DWARFUnit.h      |   2 +-
 .../SymbolFile/DWARF/x86/dwp-hash-collision.s | 142 ++++++++++++++++++
 3 files changed, 156 insertions(+), 14 deletions(-)
 create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 66a762bf9b685..0a52159d055bb 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -97,12 +97,14 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
         *m_dwo_id, m_first_die.GetOffset()));
     return; // Can't fetch the compile unit from the dwo file.
   }
-  // If the skeleton compile unit gets its unit DIE parsed first, then this
-  // will fill in the DWO file's back pointer to this skeleton compile unit.
-  // If the DWO files get parsed on their own first the skeleton back link
-  // can be done manually in DWARFUnit::GetSkeletonCompileUnit() which will
-  // do a reverse lookup and cache the result.
-  dwo_cu->SetSkeletonUnit(this);
+
+  // Link the DWO unit to this object, if it hasn't been linked already (this
+  // can happen when we have an index, and the DWO unit is parsed first).
+  if (!dwo_cu->LinkToSkeletonUnit(*this)) {
+    SetDwoError(Status::createWithFormat(
+        "multiple compile units with Dwo ID {0:x16}", *m_dwo_id));
+    return;
+  }
 
   DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
   if (!dwo_cu_die.IsValid()) {
@@ -718,13 +720,11 @@ DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
   return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit);
 }
 
-void DWARFUnit::SetSkeletonUnit(DWARFUnit *skeleton_unit) {
-  // If someone is re-setting the skeleton compile unit backlink, make sure
-  // it is setting it to a valid value when it wasn't valid, or if the
-  // value in m_skeleton_unit was valid, it should be the same value.
-  assert(skeleton_unit);
-  assert(m_skeleton_unit == nullptr || m_skeleton_unit == skeleton_unit);
-  m_skeleton_unit = skeleton_unit;
+bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
+  if (m_skeleton_unit && m_skeleton_unit != &skeleton_unit)
+    return false;
+  m_skeleton_unit = &skeleton_unit;
+  return true;
 }
 
 bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 85c37971ced8e..209104fe3a054 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -170,7 +170,7 @@ class DWARFUnit : public UserID {
   /// both cases correctly and avoids crashes.
   DWARFCompileUnit *GetSkeletonUnit();
 
-  void SetSkeletonUnit(DWARFUnit *skeleton_unit);
+  bool LinkToSkeletonUnit(DWARFUnit &skeleton_unit);
 
   bool Supports_DW_AT_APPLE_objc_complete_type();
 
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
new file mode 100644
index 0000000000000..d626b4602ad58
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-hash-collision.s
@@ -0,0 +1,142 @@
+## Test that lldb handles (mainly, that it doesn't crash) the situation where
+## two skeleton compile units have the same DWO ID (and try to claim the same
+## split unit from the DWP file. This can sometimes happen when the compile unit
+## is nearly empty (e.g. because LTO has optimized all of it away).
+
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s --defsym MAIN=0 > %t
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t.dwp
+# RUN: %lldb %t -o "image lookup -t my_enum_type" \
+# RUN:   -o "image dump separate-debug-info" -o exit | FileCheck %s
+
+## Check that we're able to access the type within the split unit (no matter
+## which skeleton unit it ends up associated with). Completely ignoring the unit
+## might also be reasonable.
+# CHECK: image lookup -t my_enum_type
+# CHECK: 1 match found
+# CHECK:      name = "my_enum_type", byte-size = 4, compiler_type = "enum my_enum_type {
+# CHECK-NEXT: }"
+#
+## Check that we get some indication of the error.
+# CHECK: image dump separate-debug-info
+# CHECK:      Dwo ID             Err Dwo Path
+# CHECK: 0xdeadbeefbaadf00d E   multiple compile units with Dwo ID 0xdeadbeefbaadf00d
+
+.set DWO_ID, 0xdeadbeefbaadf00d
+
+## The main file.
+.ifdef MAIN
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                       # Abbreviation Code
+        .byte   74                      # DW_TAG_compile_unit
+        .byte   0                       # DW_CHILDREN_no
+        .byte   0x76                    # DW_AT_dwo_name
+        .byte   8                       # DW_FORM_string
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+
+        .section        .debug_info,"", at progbits
+.irpc I,01
+.Lcu_begin\I:
+        .long   .Ldebug_info_end\I-.Ldebug_info_start\I # Length of Unit
+.Ldebug_info_start\I:
+        .short  5                       # DWARF version number
+        .byte   4                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .quad   DWO_ID                  # DWO id
+        .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+        .ascii  "foo"
+        .byte   '0' + \I
+        .asciz  ".dwo\0"                # DW_AT_dwo_name
+.Ldebug_info_end\I:
+.endr
+
+.else
+## DWP file starts here.
+
+        .section        .debug_abbrev.dwo,"e", at progbits
+.LAbbrevBegin:
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   8                       # DW_FORM_string
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   2                       # Abbreviation Code
+        .byte   4                       # DW_TAG_enumeration_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   11                      # DW_AT_byte_size
+        .byte   11                      # DW_FORM_data1
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   4                       # Abbreviation Code
+        .byte   36                      # DW_TAG_base_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   8                       # DW_FORM_string
+        .byte   62                      # DW_AT_encoding
+        .byte   11                      # DW_FORM_data1
+        .byte   11                      # DW_AT_byte_size
+        .byte   11                      # DW_FORM_data1
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+.LAbbrevEnd:
+        .section        .debug_info.dwo,"e", at progbits
+.LCUBegin:
+.Lcu_begin1:
+        .long   .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+        .short  5                       # DWARF version number
+        .byte   5                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   0                       # Offset Into Abbrev. Section
+        .quad   DWO_ID                  # DWO id
+        .byte   1                       # Abbrev [1] DW_TAG_compile_unit
+        .asciz  "Hand-written DWARF"    # DW_AT_producer
+        .short  12                      # DW_AT_language
+        .byte   2                       # Abbrev [2] DW_TAG_enumeration_type
+        .asciz  "my_enum_type"          # DW_AT_name
+        .long   .Lint-.Lcu_begin1       # DW_AT_type
+        .byte   4                       # DW_AT_byte_size
+.Lint:
+        .byte   4                       # Abbrev [4] DW_TAG_base_type
+        .asciz  "int"                   # DW_AT_name
+        .byte   5                       # DW_AT_encoding
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end1:
+.LCUEnd:
+        .section .debug_cu_index, "", @progbits
+## Header:
+        .short 5                        # Version
+        .short 0                        # Padding
+        .long 2                         # Section count
+        .long 1                         # Unit count
+        .long 2                         # Slot count
+## Hash Table of Signatures:
+        .quad 0
+        .quad DWO_ID
+## Parallel Table of Indexes:
+        .long 0
+        .long 1
+## Table of Section Offsets:
+## Row 0:
+        .long 1                         # DW_SECT_INFO
+        .long 3                         # DW_SECT_ABBREV
+## Row 1:
+        .long 0                         # Offset in .debug_info.dwo
+        .long 0                         # Offset in .debug_abbrev.dwo
+## Table of Section Sizes:
+        .long .LCUEnd-.LCUBegin         # Size in .debug_info.dwo
+        .long .LAbbrevEnd-.LAbbrevBegin # Size in .debug_abbrev.dwo
+.endif



More information about the lldb-commits mailing list