[llvm] [LLVM][DWARF] Change .debug_names abbrev to be an index (PR #81200)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 8 14:31:03 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-debuginfo

Author: Alexander Yermolovich (ayermolo)

<details>
<summary>Changes</summary>

Based on the discussion in https://github.com/llvm/llvm-project/pull/80229
changed implementation to align with how .debug_abbrev is handled. So that
.debug_names abbrev tag is a monotonically increasing index. This allows for
tools like LLDB to access it in constant time.


---
Full diff: https://github.com/llvm/llvm-project/pull/81200.diff


4 Files Affected:

- (modified) llvm/include/llvm/CodeGen/AccelTable.h (+43) 
- (modified) llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp (+46-58) 
- (modified) llvm/test/DebugInfo/X86/debug-names-dwarf64.ll (+5-5) 
- (modified) llvm/test/DebugInfo/X86/debug-names-types.ll (+21-21) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index e6a661696354b..38da67636bae8 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -360,6 +360,49 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
     unsigned Index;
     DWARF5AccelTableData::AttributeEncoding Encoding;
   };
+  union AbbrevDescriptor {
+    struct {
+      uint32_t CompUnit : 1;
+      uint32_t TypeUnit : 1;
+      uint32_t DieOffset : 1;
+      uint32_t Parent : 2;
+      uint32_t TypeHash : 1;
+      uint32_t Tag : 26;
+    } Bits;
+    uint32_t Value = 0;
+  };
+  struct TagIndex {
+    uint32_t DieTag;
+    uint32_t Index;
+  };
+  struct cmpByTagIndex {
+    bool operator()(const TagIndex &LHS, const TagIndex &RHS) const {
+      return LHS.Index < RHS.Index;
+    }
+  };
+  enum IdxParentEncoding : uint8_t {
+    NoIndexedParent =
+        0,        /// Parent information present but parent isn't indexed.
+    Ref4 = 1,     /// Parent information present and parent is indexed.
+    NoParent = 2, /// Parent information missing.
+  };
+
+  /// Returns DW_IDX_parent abbrev encoding for the given form.
+  static uint8_t
+  encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
+    if (!MaybeParentForm)
+      return NoParent;
+    switch (*MaybeParentForm) {
+    case dwarf::Form::DW_FORM_flag_present:
+      return NoIndexedParent;
+    case dwarf::Form::DW_FORM_ref4:
+      return Ref4;
+    default:
+      // This is not crashing on bad input: we should only reach this if the
+      // internal compiler logic is faulty; see getFormForIdxParent.
+      llvm_unreachable("Bad form for IDX_parent");
+    }
+  }
   /// Returns type units that were constructed.
   const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
   /// Add a type unit start symbol.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 1024aabf2ab0f..a30a1393ba58f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -208,7 +208,9 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   };
 
   Header Header;
-  DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>>
+  std::map<DWARF5AccelTable::TagIndex,
+           SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>,
+           DWARF5AccelTable::cmpByTagIndex>
       Abbreviations;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
@@ -223,6 +225,15 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   bool IsSplitDwarf = false;
   /// Stores the DIE offsets which are indexed by this table.
   DenseSet<OffsetAndUnitID> IndexedOffsets;
+  /// Mapping between AbbrevTag and Index.
+  std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
+
+  /// Constructs and returns a unique AbbrevTag that captures what a DIE
+  /// accesses.
+  DWARF5AccelTable::TagIndex getAbbrevIndex(
+      const unsigned DieTag,
+      const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
+      const std::optional<dwarf::Form> &MaybeParentForm);
 
   void populateAbbrevsMap();
 
@@ -234,7 +245,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   void emitEntry(
       const DWARF5AccelTableData &Entry,
       const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
-      DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const;
+      DenseSet<MCSymbol *> &EmittedAccelEntrySymbols);
   void emitData();
 
 public:
@@ -409,49 +420,30 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) {
   return {};
 }
 
-enum IdxParentEncoding : uint8_t {
-  NoIndexedParent = 0, /// Parent information present but parent isn't indexed.
-  Ref4 = 1,            /// Parent information present and parent is indexed.
-  NoParent = 2,        /// Parent information missing.
-};
-
-static uint32_t constexpr NumBitsIdxParent = 2;
-
-uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
-  if (!MaybeParentForm)
-    return NoParent;
-  switch (*MaybeParentForm) {
-  case dwarf::Form::DW_FORM_flag_present:
-    return NoIndexedParent;
-  case dwarf::Form::DW_FORM_ref4:
-    return Ref4;
-  default:
-    // This is not crashing on bad input: we should only reach this if the
-    // internal compiler logic is faulty; see getFormForIdxParent.
-    llvm_unreachable("Bad form for IDX_parent");
-  }
-}
-
-static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash;
-static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent;
-static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
-  return AbbrvTag >> TagBitOffset;
-}
-
-/// Constructs a unique AbbrevTag that captures what a DIE accesses.
-/// Using this tag we can emit a unique abbreviation for each DIE.
-static uint32_t constructAbbreviationTag(
-    const unsigned Tag,
+DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
+    const unsigned DieTag,
     const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
-    std::optional<dwarf::Form> MaybeParentForm) {
-  uint32_t AbbrvTag = 0;
-  if (EntryRet)
-    AbbrvTag |= 1 << EntryRet->Encoding.Index;
-  AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
-  AbbrvTag |= 1 << dwarf::DW_IDX_parent;
-  AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset;
-  AbbrvTag |= Tag << TagBitOffset;
-  return AbbrvTag;
+    const std::optional<dwarf::Form> &MaybeParentForm) {
+  DWARF5AccelTable::AbbrevDescriptor AbbrvDesc;
+  if (EntryRet) {
+    switch (EntryRet->Encoding.Index) {
+    case dwarf::DW_IDX_compile_unit:
+      AbbrvDesc.Bits.CompUnit = true;
+      break;
+    case dwarf::DW_IDX_type_unit:
+      AbbrvDesc.Bits.TypeUnit = true;
+      break;
+    default:
+      llvm_unreachable("Invalid encoding index");
+      break;
+    }
+  }
+  AbbrvDesc.Bits.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
+  AbbrvDesc.Bits.DieOffset = true;
+  AbbrvDesc.Bits.Tag = DieTag;
+  auto Iter = AbbrevTagToIndexMap.insert(
+      {AbbrvDesc.Value, static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
+  return {DieTag, Iter.first->second};
 }
 
 static std::optional<dwarf::Form>
@@ -476,8 +468,8 @@ void Dwarf5AccelTableWriter::populateAbbrevsMap() {
         unsigned Tag = Value->getDieTag();
         std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent(
             IndexedOffsets, Value->getParentDieOffsetAndUnitID());
-        uint32_t AbbrvTag =
-            constructAbbreviationTag(Tag, EntryRet, MaybeParentForm);
+        const DWARF5AccelTable::TagIndex AbbrvTag =
+            getAbbrevIndex(Tag, EntryRet, MaybeParentForm);
         if (Abbreviations.count(AbbrvTag) == 0) {
           SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA;
           if (EntryRet)
@@ -538,11 +530,9 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
   Asm->OutStreamer->emitLabel(AbbrevStart);
   for (const auto &Abbrev : Abbreviations) {
     Asm->OutStreamer->AddComment("Abbrev code");
-    uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
-    assert(Tag != 0);
-    Asm->emitULEB128(Abbrev.first);
-    Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
-    Asm->emitULEB128(Tag);
+    Asm->emitULEB128(Abbrev.first.Index);
+    Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first.DieTag));
+    Asm->emitULEB128(Abbrev.first.DieTag);
     for (const auto &AttrEnc : Abbrev.second) {
       Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
       Asm->emitULEB128(AttrEnc.Form,
@@ -558,20 +548,18 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
 void Dwarf5AccelTableWriter::emitEntry(
     const DWARF5AccelTableData &Entry,
     const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
-    DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const {
+    DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) {
   std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
       getIndexForEntry(Entry);
   std::optional<OffsetAndUnitID> MaybeParentOffset =
       Entry.getParentDieOffsetAndUnitID();
   std::optional<dwarf::Form> MaybeParentForm =
       getFormForIdxParent(IndexedOffsets, MaybeParentOffset);
-  uint32_t AbbrvTag =
-      constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm);
-  auto AbbrevIt = Abbreviations.find(AbbrvTag);
+  const DWARF5AccelTable::TagIndex TagIndexVal =
+      getAbbrevIndex(Entry.getDieTag(), EntryRet, MaybeParentForm);
+  auto AbbrevIt = Abbreviations.find(TagIndexVal);
   assert(AbbrevIt != Abbreviations.end() &&
          "Why wasn't this abbrev generated?");
-  assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
-         "Invalid Tag");
 
   auto EntrySymbolIt =
       DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID());
@@ -584,7 +572,7 @@ void Dwarf5AccelTableWriter::emitEntry(
   if (EmittedAccelEntrySymbols.insert(EntrySymbol).second)
     Asm->OutStreamer->emitLabel(EntrySymbol);
 
-  Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+  Asm->emitULEB128(TagIndexVal.Index, "Abbreviation code");
 
   for (const auto &AttrEnc : AbbrevIt->second) {
     Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
diff --git a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
index c15e2ad1d56b0..9a5fd07335873 100644
--- a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
@@ -30,11 +30,6 @@
 ; CHECK-NEXT:     CU[0]: 0x00000000
 ; CHECK-NEXT:   ]
 ; CHECK-NEXT:   Abbreviations [
-; CHECK-NEXT:     Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
-; CHECK-NEXT:       Tag: DW_TAG_label
-; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT:       DW_IDX_parent: DW_FORM_ref4
-; CHECK-NEXT:     }
 ; CHECK-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
 ; CHECK-NEXT:       Tag: DW_TAG_base_type
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
@@ -50,6 +45,11 @@
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-NEXT:     }
+; CHECK-NEXT:     Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
+; CHECK-NEXT:       Tag: DW_TAG_label
+; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT:       DW_IDX_parent: DW_FORM_ref4
+; CHECK-NEXT:     }
 ; CHECK-NEXT:   ]
 ; CHECK-NEXT:   Bucket 0 [
 ; CHECK-NEXT:     Name 1 {
diff --git a/llvm/test/DebugInfo/X86/debug-names-types.ll b/llvm/test/DebugInfo/X86/debug-names-types.ll
index f41bb5524b9c3..ff0d4d52c1f07 100644
--- a/llvm/test/DebugInfo/X86/debug-names-types.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-types.ll
@@ -37,20 +37,14 @@
 ; CHECK-NEXT:        LocalTU[0]: 0x00000000
 ; CHECK-NEXT:      ]
 ; CHECK:        Abbreviations [
-; CHECK-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
-; CHECK-NEXT:       Tag: DW_TAG_structure_type
-; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
-; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
-; CHECK-NEXT:     }
-; CHECK-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
 ; CHECK-NEXT:       Tag: DW_TAG_base_type
-; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-NEXT:     }
-; CHECK-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
-; CHECK-NEXT:       Tag: DW_TAG_base_type
+; CHECK-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+; CHECK-NEXT:       Tag: DW_TAG_structure_type
+; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-NEXT:     }
@@ -64,6 +58,12 @@
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-NEXT:     }
+; CHECK-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT:       Tag: DW_TAG_base_type
+; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
+; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; CHECK-NEXT:     }
 ; CHECK-NEXT:   ]
 ; CHECK-NEXT:   Bucket 0 [
 ; CHECK-NEXT:     Name 1 {
@@ -130,7 +130,7 @@
 ; CHECK-SPLIT:          Foreign TU count: 1
 ; CHECK-SPLIT-NEXT:     Bucket count: 4
 ; CHECK-SPLIT-NEXT:     Name count: 4
-; CHECK-SPLIT-NEXT:     Abbreviations table size: 0x32
+; CHECK-SPLIT-NEXT:     Abbreviations table size: 0x2D
 ; CHECK-SPLIT-NEXT:     Augmentation: 'LLVM0700'
 ; CHECK-SPLIT-NEXT:   }
 ; CHECK-SPLIT-NEXT:   Compilation Unit offsets [
@@ -140,20 +140,14 @@
 ; CHECK-SPLIT-NEXT:     ForeignTU[0]: 0x675d23e4f33235f2
 ; CHECK-SPLIT-NEXT:   ]
 ; CHECK-SPLIT-NEXT:   Abbreviations [
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT:       Tag: DW_TAG_structure_type
-; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
-; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-SPLIT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
-; CHECK-SPLIT-NEXT:     }
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
-; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-SPLIT-NEXT:     }
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:       Tag: DW_TAG_structure_type
+; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-SPLIT-NEXT:     }
@@ -167,6 +161,12 @@
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
 ; CHECK-SPLIT-NEXT:     }
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
+; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-SPLIT-NEXT:       DW_IDX_parent: DW_FORM_flag_present
+; CHECK-SPLIT-NEXT:     }
 ; CHECK-SPLIT-NEXT:   ]
 ; CHECK-SPLIT-NEXT:   Bucket 0 [
 ; CHECK-SPLIT-NEXT:     Name 1 {

``````````

</details>


https://github.com/llvm/llvm-project/pull/81200


More information about the llvm-commits mailing list