[llvm] 22afe19 - DebugInfo: Rebuild dwp debug_info index column from v5 indexes more robustly

David Blaikie via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 28 13:47:28 PDT 2023


Author: David Blaikie
Date: 2023-03-28T20:47:20Z
New Revision: 22afe19ac03f5b5db642cbb8ba7022c2ffc09710

URL: https://github.com/llvm/llvm-project/commit/22afe19ac03f5b5db642cbb8ba7022c2ffc09710
DIFF: https://github.com/llvm/llvm-project/commit/22afe19ac03f5b5db642cbb8ba7022c2ffc09710.diff

LOG: DebugInfo: Rebuild dwp debug_info index column from v5 indexes more robustly

the v4 rebuilding is a best-effort because it's not possible to reliably
parse the DWO ID as it requires the abbrev section (& if the index isn't
trustworthy then there's no way to find the associated abbrev section
contribution for a given info section contribution)

But in v5 the DWO ID/type signature is in the header and can be rebuilt
losslessly (only at the cost of performance of rescanning/parsing the
headers of all the units), so let's implement that.

the testing isn't /ideal/ - I think the testing should've been
implemented as a hardcoded dwp file with a corrupted/incorrect index,
then the test could've demonstrated that reparsing the index produces
the right answer - but this is a quick port of the existing v5 test back
to v4 so that we don't lose coverage on the v4 codepath now that it's
separated from the v5 codepath.

Differential Revision: https://reviews.llvm.org/D146662

Added: 
    llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s

Modified: 
    llvm/lib/DebugInfo/DWARF/DWARFContext.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index f648ef8ff7707..eb57bdde064f8 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -779,7 +780,7 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) {
   return Success;
 }
 
-void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
+void fixupIndexV4(const DWARFObject &DObj, DWARFContext &C,
                 DWARFUnitIndex &Index) {
   using EntryType = DWARFUnitIndex::Entry::SectionContribution;
   using EntryMap = DenseMap<uint32_t, EntryType>;
@@ -847,14 +848,62 @@ void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
   return;
 }
 
+void fixupIndexV5(const DWARFObject &DObj, DWARFContext &C,
+                  DWARFUnitIndex &Index) {
+  DenseMap<uint64_t, uint64_t> Map;
+
+  DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
+    if (!(C.getParseCUTUIndexManually() ||
+          S.Data.size() >= std::numeric_limits<uint32_t>::max()))
+      return;
+    DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
+    uint64_t Offset = 0;
+    while (Data.isValidOffset(Offset)) {
+      DWARFUnitHeader Header;
+      if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
+        logAllUnhandledErrors(
+            createError("Failed to parse unit header in DWP file"), errs());
+        break;
+      }
+      bool CU = Header.getUnitType() == DW_UT_split_compile;
+      uint64_t Sig = CU ? *Header.getDWOId() : Header.getTypeHash();
+      Map[Sig] = Header.getOffset();
+      Offset = Header.getNextUnitOffset();
+    }
+  });
+  for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
+    if (!E.isValid())
+      continue;
+    DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
+    auto Iter = Map.find(E.getSignature());
+    if (Iter == Map.end()) {
+      logAllUnhandledErrors(
+          createError("Could not find unit with signature 0x" +
+                      Twine::utohexstr(E.getSignature()) + " in the Map"),
+          errs());
+      break;
+    }
+    CUOff.setOffset(Iter->second);
+  }
+}
+
+void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
+                DWARFUnitIndex &Index) {
+  if (Index.getVersion() < 5)
+    fixupIndexV4(DObj, C, Index);
+  else
+    fixupIndexV5(DObj, C, Index);
+}
+
 const DWARFUnitIndex &DWARFContext::getCUIndex() {
   if (CUIndex)
     return *CUIndex;
 
   DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0);
   CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
-  CUIndex->parse(CUIndexData);
-  fixupIndex(*DObj, *this, *CUIndex.get());
+  bool IsParseSuccessful = CUIndex->parse(CUIndexData);
+  if (IsParseSuccessful)
+    fixupIndex(*DObj, *this, *CUIndex);
   return *CUIndex;
 }
 
@@ -868,7 +917,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() {
   // If we are parsing TU-index and for .debug_types section we don't need
   // to do anything.
   if (isParseSuccessful && TUIndex->getVersion() != 2)
-    fixupIndex(*DObj, *this, *TUIndex.get());
+    fixupIndex(*DObj, *this, *TUIndex);
   return *TUIndex;
 }
 

diff  --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s
new file mode 100644
index 0000000000000..333956db761d7
--- /dev/null
+++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v4.s
@@ -0,0 +1,79 @@
+# This test checks if we can correctly parse manull cu and tu index for DWARF4.
+
+# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o \
+# RUN:         -split-dwarf-file=%t.dwo -dwarf-version=4
+# RUN: llvm-dwp %t.dwo -o %t.dwp
+# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index %t.dwp | FileCheck %s
+# RUN: llvm-dwarfdump -debug-info -debug-types -debug-cu-index -debug-tu-index -manaully-generate-unit-index %t.dwp | FileCheck %s
+
+## Note: In order to check whether the type unit index is generated
+## there is no need to add the missing DIEs for the structure type of the type unit.
+
+# CHECK-DAG: .debug_info.dwo contents:
+# CHECK: 0x00000000: Compile Unit: length = 0x00000010, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x00000014)
+# CHECK:  DW_AT_GNU_dwo_id  ([[CUID1:.*]])
+# CHECK-DAG: .debug_types.dwo contents:
+# CHECK: 0x00000000: Type Unit: length = 0x00000016, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID1:.*]], type_offset = 0x0019 (next unit at 0x0000001a)
+# CHECK: 0x0000001a: Type Unit: length = 0x00000016, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08, name = '', type_signature = [[TUID2:.*]], type_offset = 0x0019 (next unit at 0x00000034)
+# CHECK-DAG: .debug_cu_index contents:
+# CHECK: version = 2, units = 1, slots = 2
+# CHECK: Index Signature          INFO                                     ABBREV
+# CHECK:     2 [[CUID1]]          [0x0000000000000000, 0x0000000000000014) [0x00000000, 0x00000013)
+# CHECK-DAG: .debug_tu_index contents:
+# CHECK: version = 2, units = 2, slots = 4
+# CHECK: Index Signature          TYPES                                    ABBREV
+# CHECK:     1 [[TUID1]]          [0x0000000000000000, 0x000000000000001a) [0x00000000, 0x00000013)
+# CHECK:     4 [[TUID2]]          [0x000000000000001a, 0x0000000000000034) [0x00000000, 0x00000013)
+
+    .section	.debug_types.dwo,"e", at progbits
+    .long	.Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit
+.Ldebug_info_dwo_start0:
+    .short	4                               # DWARF version number
+    .long	0                               # Offset Into Abbrev. Section
+    .byte	8                               # Address Size (in bytes)
+    .quad	5657452045627120676             # Type Signature
+    .long	25                              # Type DIE Offset
+    .byte	2                               # Abbrev [2] DW_TAG_type_unit
+    .byte	3                               # Abbrev [3] DW_TAG_structure_type
+    .byte	0                               # End Of Children Mark
+.Ldebug_info_dwo_end0:
+    .section	.debug_types.dwo,"e", at progbits
+    .long	.Ldebug_info_dwo_end1-.Ldebug_info_dwo_start1 # Length of Unit
+.Ldebug_info_dwo_start1:
+    .short	4                               # DWARF version number
+    .long	0                               # Offset Into Abbrev. Section
+    .byte	8                               # Address Size (in bytes)
+    .quad	-8528522068957683993            # Type Signature
+    .long	25                              # Type DIE Offset
+    .byte	2                               # Abbrev [2] DW_TAG_type_unit
+    .byte	3                               # Abbrev [3] DW_TAG_structure_type
+    .byte	0                               # End Of Children Mark
+.Ldebug_info_dwo_end1:
+    .section	.debug_info.dwo,"e", at progbits
+    .long	.Ldebug_info_dwo_end2-.Ldebug_info_dwo_start2 # Length of Unit
+.Ldebug_info_dwo_start2:
+    .short	4                               # DWARF version number
+    .long	0                               # Offset Into Abbrev. Section
+    .byte	8                               # Address Size (in bytes)
+    .byte	1                               # Abbrev [1] DW_TAG_compile_unit
+    .quad	-6619898858003450627            # DW_AT_GNU_dwo_id
+.Ldebug_info_dwo_end2:
+    .section	.debug_abbrev.dwo,"e", at progbits
+    .byte	1                               # Abbreviation Code
+    .byte	17                              # DW_TAG_compile_unit
+    .byte	0                               # DW_CHILDREN_no
+    .ascii	"\261B"                         # DW_AT_GNU_dwo_id
+    .byte	7                               # DW_FORM_data8
+    .byte	0                               # EOM(1)
+    .byte	0                               # EOM(2)
+    .byte	2                               # Abbreviation Code
+    .byte	65                              # DW_TAG_type_unit
+    .byte	1                               # DW_CHILDREN_yes
+    .byte	0                               # EOM
+    .byte	0                               # EOM
+    .byte	3                               # Abbreviation Code
+    .byte	0x13                            # DW_TAG_structure_unit
+    .byte	0                               # DW_CHILDREN_no
+    .byte	0                               # EOM
+    .byte	0                               # EOM
+    .byte	0                               # EOM


        


More information about the llvm-commits mailing list