[llvm] [DebugNames] Implement per-entry abbreviation support (PR #76731)

Felipe de Azevedo Piovezan via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 2 08:06:15 PST 2024


https://github.com/felipepiovezan created https://github.com/llvm/llvm-project/pull/76731

(ignore the first commit, this depends on https://github.com/llvm/llvm-project/pull/76296)

    Prior to this commit, the choice of forms to use in debug_names abbreviations
    was made independently of entries themselves. For example, if we wanted entries
    to have a "IDX_compile_unit", all entries needed to use the same form (e.g
    data2) for such IDX, even if a specific entry could fit its IDX in a single byte
    (e.g. data1).

    This commit changes that by creating an `AbbreviationContents` data structure to
    handle the encoding of abbreviations on a per-entry basis. We don't need the
    full generality of abbreviations from the debug_info section -- abbreviations in
    debug_names are more limited --, but this commit is desirable for two reasons:

    1. Upcoming patches may want to use different forms for different entries (there
    is ongoing work for IDX_parent that will benefit from this)
    2. This allows for space savings by using smaller forms.

    A few tests had to be updated, as this commit changes the abbreviation number
    (and therefore the order in which abbreviations are printed). Notably, this
    _decreases_ the abbreviation number used, which may lead to smaller ULEB
    encodings. A future patch should just renumber these to start with 1.



>From acdf40c249adbea774bc5880d7dbf7d2bb87efc1 Mon Sep 17 00:00:00 2001
From: Felipe de Azevedo Piovezan <fpiovezan at apple.com>
Date: Sat, 23 Dec 2023 14:44:53 -0300
Subject: [PATCH 1/2] [AsmPrinter][Dwarf5][nfc] Remove template from AccelTable
 class

This template is no longer used.
---
 llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 62 +++++++++-------------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 30ea7eef3a12ba..bf580269eca62e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -182,11 +182,7 @@ class AppleAccelTableWriter : public AccelTableWriter {
 /// Class responsible for emitting a DWARF v5 Accelerator Table. The only
 /// public function is emit(), which performs the actual emission.
 ///
-/// The class is templated in its data type. This allows us to emit both dyamic
-/// and static data entries. A callback abstract the logic to provide a CU
-/// index for a given entry, which is different per data type, but identical
-/// for every entry in the same table.
-template <typename DataT>
+/// A callback abstracts the logic to provide a CU index for a given entry.
 class Dwarf5AccelTableWriter : public AccelTableWriter {
   struct Header {
     uint16_t Version = 5;
@@ -216,7 +212,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
   llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
-      const DataT &)>
+      const DWARF5AccelTableData &)>
       getIndexForEntry;
   MCSymbol *ContributionEnd = nullptr;
   MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
@@ -232,7 +228,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   void emitBuckets() const;
   void emitStringOffsets() const;
   void emitAbbrevs() const;
-  void emitEntry(const DataT &Entry) const;
+  void emitEntry(const DWARF5AccelTableData &Entry) const;
   void emitData() const;
 
 public:
@@ -240,8 +236,8 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
       AsmPrinter *Asm, const AccelTableBase &Contents,
       ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
       ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
-      llvm::function_ref<
-          std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)>
+      llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+          const DWARF5AccelTableData &)>
           getIndexForEntry,
       bool IsSplitDwarf);
 
@@ -370,8 +366,7 @@ DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
                                            const bool IsTU)
     : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {}
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
+void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   assert(CompUnitCount > 0 && "Index must have at least one CU.");
 
   AsmPrinter *Asm = Ctx.Asm;
@@ -417,14 +412,14 @@ static uint32_t constructAbbreviationTag(
   AbbrvTag |= Tag << LowerBitSize;
   return AbbrvTag;
 }
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
+void Dwarf5AccelTableWriter::populateAbbrevsMap() {
   for (auto &Bucket : Contents.getBuckets()) {
     for (auto *Hash : Bucket) {
       for (auto *Value : Hash->Values) {
         std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
-            getIndexForEntry(*static_cast<const DataT *>(Value));
-        unsigned Tag = static_cast<const DataT *>(Value)->getDieTag();
+            getIndexForEntry(*static_cast<const DWARF5AccelTableData *>(Value));
+        unsigned Tag =
+            static_cast<const DWARF5AccelTableData *>(Value)->getDieTag();
         uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
         if (Abbreviations.count(AbbrvTag) == 0) {
           SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
@@ -438,8 +433,7 @@ void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
   }
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
+void Dwarf5AccelTableWriter::emitCUList() const {
   for (const auto &CU : enumerate(CompUnits)) {
     Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index()));
     if (std::holds_alternative<MCSymbol *>(CU.value()))
@@ -449,8 +443,7 @@ void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
   }
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitTUList() const {
+void Dwarf5AccelTableWriter::emitTUList() const {
   for (const auto &TU : enumerate(TypeUnits)) {
     Asm->OutStreamer->AddComment("Type unit " + Twine(TU.index()));
     if (std::holds_alternative<MCSymbol *>(TU.value()))
@@ -462,8 +455,7 @@ void Dwarf5AccelTableWriter<DataT>::emitTUList() const {
   }
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
+void Dwarf5AccelTableWriter::emitBuckets() const {
   uint32_t Index = 1;
   for (const auto &Bucket : enumerate(Contents.getBuckets())) {
     Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index()));
@@ -472,8 +464,7 @@ void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
   }
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
+void Dwarf5AccelTableWriter::emitStringOffsets() const {
   for (const auto &Bucket : enumerate(Contents.getBuckets())) {
     for (auto *Hash : Bucket.value()) {
       DwarfStringPoolEntryRef String = Hash->Name;
@@ -484,8 +475,7 @@ void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
   }
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
+void Dwarf5AccelTableWriter::emitAbbrevs() const {
   Asm->OutStreamer->emitLabel(AbbrevStart);
   for (const auto &Abbrev : Abbreviations) {
     Asm->OutStreamer->AddComment("Abbrev code");
@@ -506,8 +496,8 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
   Asm->OutStreamer->emitLabel(AbbrevEnd);
 }
 
-template <typename DataT>
-void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
+void Dwarf5AccelTableWriter::emitEntry(
+    const DWARF5AccelTableData &Entry) const {
   std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
       getIndexForEntry(Entry);
   uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet);
@@ -537,27 +527,26 @@ void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
   }
 }
 
-template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
+void Dwarf5AccelTableWriter::emitData() const {
   Asm->OutStreamer->emitLabel(EntryPool);
   for (auto &Bucket : Contents.getBuckets()) {
     for (auto *Hash : Bucket) {
       // Remember to emit the label for our offset.
       Asm->OutStreamer->emitLabel(Hash->Sym);
       for (const auto *Value : Hash->Values)
-        emitEntry(*static_cast<const DataT *>(Value));
+        emitEntry(*static_cast<const DWARF5AccelTableData *>(Value));
       Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
       Asm->emitInt8(0);
     }
   }
 }
 
-template <typename DataT>
-Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
+Dwarf5AccelTableWriter::Dwarf5AccelTableWriter(
     AsmPrinter *Asm, const AccelTableBase &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
-    llvm::function_ref<
-        std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(const DataT &)>
+    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+        const DWARF5AccelTableData &)>
         getIndexForEntry,
     bool IsSplitDwarf)
     : AccelTableWriter(Asm, Contents, false),
@@ -570,7 +559,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
   populateAbbrevsMap();
 }
 
-template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
+void Dwarf5AccelTableWriter::emit() {
   Header.emit(*this);
   emitCUList();
   emitTUList();
@@ -635,7 +624,7 @@ void llvm::emitDWARF5AccelTable(
       DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1);
   dwarf::Form TUIndexForm =
       DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1);
-  Dwarf5AccelTableWriter<DWARF5AccelTableData>(
+  Dwarf5AccelTableWriter(
       Asm, Contents, CompUnits, TypeUnits,
       [&](const DWARF5AccelTableData &Entry)
           -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
@@ -667,8 +656,7 @@ void llvm::emitDWARF5AccelTable(
         getIndexForEntry) {
   std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
   Contents.finalize(Asm, "names");
-  Dwarf5AccelTableWriter<DWARF5AccelTableData>(Asm, Contents, CUs, TypeUnits,
-                                               getIndexForEntry, false)
+  Dwarf5AccelTableWriter(Asm, Contents, CUs, TypeUnits, getIndexForEntry, false)
       .emit();
 }
 

>From 3c9ea1ef6120116e1e65e8518b672b4826a9f80b Mon Sep 17 00:00:00 2001
From: Felipe de Azevedo Piovezan <fpiovezan at apple.com>
Date: Sat, 23 Dec 2023 12:51:49 -0300
Subject: [PATCH 2/2] [DebugNames] Implement per-entry abbreviation support

Prior to this commit, the choice of forms to use in debug_names abbreviations
was made independently of entries themselves. For example, if we wanted entries
to have a "IDX_compile_unit", all entries needed to use the same form (e.g
data2) for such IDX, even if a specific entry could fit its IDX in a single byte
(e.g. data1).

This commit changes that by creating an `AbbreviationContents` data structure to
handle the encoding of abbreviations on a per-entry basis. We don't need the
full generality of abbreviations from the debug_info section -- abbreviations in
debug_names are more limited --, but this commit is desirable for two reasons:

1. Upcoming patches may want to use different forms for different entries (there
is ongoing work for IDX_parent that will benefit from this)
2. This allows for space savings by using smaller forms.

A few tests had to be updated, as this commit changes the abbreviation number
(and therefore the order in which abbreviations are printed). Notably, this
_decreases_ the abbreviation number used, which may lead to smaller ULEB
encodings. A future patch should just renumber these to start with 1.
---
 llvm/include/llvm/CodeGen/AccelTable.h        |   6 +-
 llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp    | 200 +++++++++++++-----
 llvm/lib/DWARFLinker/DWARFStreamer.cpp        |   7 +-
 .../DWARFLinkerParallel/DWARFEmitterImpl.cpp  |   7 +-
 .../DebugInfo/Generic/debug-names-many-cu.ll  |  13 +-
 llvm/test/DebugInfo/X86/debug-names-types.ll  |  24 +--
 6 files changed, 174 insertions(+), 83 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index 6eb09f32f9f951..9098a1a821bf47 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -305,9 +305,9 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
   TUVectorTy TUSymbolsOrHashes;
 
 public:
-  struct UnitIndexAndEncoding {
+  struct UnitIndex {
     unsigned Index;
-    DWARF5AccelTableData::AttributeEncoding Encoding;
+    bool IsType;
   };
   /// Returns type units that were constructed.
   const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
@@ -366,7 +366,7 @@ void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
 void emitDWARF5AccelTable(
     AsmPrinter *Asm, DWARF5AccelTable &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
-    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
         const DWARF5AccelTableData &)>
         getIndexForEntry);
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index bf580269eca62e..6d4959e9c72084 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -179,6 +179,122 @@ class AppleAccelTableWriter : public AccelTableWriter {
 #endif
 };
 
+/// Helper enum to encode whether something is a Type or Compile unit (or none)
+/// with the minimal number of bits.
+enum class TypeOrCompileUnit : uint8_t { Compile = 0, Type = 1, None = 2 };
+
+/// Helper enum to encode DW_FORM_data{1,2,4,8} with the minimal number of
+/// bits.
+enum class FormDataLength : uint8_t {
+  Data1 = 0,
+  Data2 = 1,
+  Data4 = 2,
+  Data8 = 3
+};
+
+/// Maps `Index` into the smallest DW_FORM_data{1,2,4,8} that can represent it,
+/// and returns the corresponding `FormDataLength`.
+static FormDataLength getFormForIndex(uint32_t Index) {
+  dwarf::Form Form = DIEInteger::BestForm(false /*IsSigned*/, Index);
+  switch (Form) {
+  case dwarf::Form::DW_FORM_data1:
+    return FormDataLength::Data1;
+  case dwarf::Form::DW_FORM_data2:
+    return FormDataLength::Data2;
+  case dwarf::Form::DW_FORM_data4:
+    return FormDataLength::Data4;
+  case dwarf::Form::DW_FORM_data8:
+    return FormDataLength::Data8;
+  default:
+    llvm_unreachable("invalid getFormForIndex");
+  }
+}
+
+/// Maps a `FormDataLength` back to the corresponding DW_FORM_data{1,2,4,8}
+static dwarf::Form toDwarfDataForm(FormDataLength DataLength) {
+  switch (DataLength) {
+  case FormDataLength::Data1:
+    return dwarf::Form::DW_FORM_data1;
+  case FormDataLength::Data2:
+    return dwarf::Form::DW_FORM_data2;
+  case FormDataLength::Data4:
+    return dwarf::Form::DW_FORM_data4;
+  case FormDataLength::Data8:
+    return dwarf::Form::DW_FORM_data8;
+  }
+  llvm_unreachable("invalid toDwarfDataForm");
+}
+
+/// Converts `UnitType` and `UnitForm` into an `AttributeEncoding` and push it
+/// into `Ans`, if UnitType != None.
+void pushTypeOrCompileUnitEncondings(
+    SmallVectorImpl<DWARF5AccelTableData::AttributeEncoding> &Vec,
+    TypeOrCompileUnit UnitType, FormDataLength UnitForm) {
+  switch (UnitType) {
+  case TypeOrCompileUnit::Compile:
+    Vec.push_back({dwarf::DW_IDX_compile_unit, toDwarfDataForm(UnitForm)});
+    break;
+  case TypeOrCompileUnit::Type:
+    Vec.push_back({dwarf::DW_IDX_type_unit, toDwarfDataForm((UnitForm))});
+    break;
+  case TypeOrCompileUnit::None:
+    break;
+  }
+  return;
+}
+
+/// Represent the contents of an Abbreviation Entry for a DWARF5AccelTable, so
+/// that a set of such entries may be created.
+/// All Abbreviations have some common content:
+///    1. IDX_die_offset, with form ref4.
+///    2. IDX_{type, compile}_unit with a form DW_FORM_data{1,2,4,8}.
+/// This class doesn't encode such common elements; instead, it only encodes
+/// contents that may change from one abbreviation to another, as those define
+/// the uniqueness of each abbreviation.
+/// Common elements still show up in accessor methods (e.g. `getAttrEncodings`).
+struct AbbreviationContents {
+  uint16_t Tag : 16;
+  TypeOrCompileUnit UnitType : 2;
+  FormDataLength UnitForm : 2;
+
+  AbbreviationContents(uint16_t Tag, TypeOrCompileUnit UnitType,
+                       uint32_t UnitIndex)
+      : Tag(Tag), UnitType(UnitType), UnitForm(getFormForIndex(UnitIndex)) {}
+  AbbreviationContents(uint16_t Tag)
+      : Tag(Tag), UnitType(TypeOrCompileUnit::None),
+        UnitForm(getFormForIndex(0)) {}
+
+  uint32_t getUniqueCode() const {
+    uint32_t Bitfield = static_cast<uint32_t>(UnitType);
+    Bitfield |= static_cast<uint32_t>(UnitForm) << 2;
+    Bitfield |= static_cast<uint32_t>(Tag) << 4;
+    return Bitfield;
+  }
+
+  SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>
+  getAttrEncodings() const {
+    SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> Ans;
+    pushTypeOrCompileUnitEncondings(Ans, UnitType, UnitForm);
+    Ans.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+    return Ans;
+  }
+};
+
+struct AbbreviationContentsInfo {
+  static AbbreviationContents getEmptyKey() {
+    return AbbreviationContents(0);
+  };
+  static AbbreviationContents getTombstoneKey() {
+    return AbbreviationContents(~0);
+  };
+  static uint32_t getHashValue(AbbreviationContents Contents) {
+    return Contents.getUniqueCode();
+  }
+  static bool isEqual(AbbreviationContents LHS, AbbreviationContents RHS) {
+    return LHS.Tag == RHS.Tag && LHS.UnitType == RHS.UnitType;
+  }
+};
+
 /// Class responsible for emitting a DWARF v5 Accelerator Table. The only
 /// public function is emit(), which performs the actual emission.
 ///
@@ -207,11 +323,10 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   };
 
   Header Header;
-  DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>>
-      Abbreviations;
+  DenseSet<AbbreviationContents, AbbreviationContentsInfo> Abbreviations;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
-  llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+  llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
       const DWARF5AccelTableData &)>
       getIndexForEntry;
   MCSymbol *ContributionEnd = nullptr;
@@ -236,7 +351,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
       AsmPrinter *Asm, const AccelTableBase &Contents,
       ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
       ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
-      llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+      llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
           const DWARF5AccelTableData &)>
           getIndexForEntry,
       bool IsSplitDwarf);
@@ -395,39 +510,27 @@ void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
 }
 
-static uint32_t constexpr LowerBitSize = dwarf::DW_IDX_type_hash;
-static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
-  return AbbrvTag >> LowerBitSize;
-}
-
-/// Constructs a unique AbbrevTag that captures what a DIE accesses.
-/// Using this tag we can emit a unique abbreviation for each DIE.
-static uint32_t constructAbbreviationTag(
+static AbbreviationContents constructAbbreviationContents(
     const unsigned Tag,
-    const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet) {
-  uint32_t AbbrvTag = 0;
-  if (EntryRet)
-    AbbrvTag |= 1 << EntryRet->Encoding.Index;
-  AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
-  AbbrvTag |= Tag << LowerBitSize;
-  return AbbrvTag;
+    const std::optional<DWARF5AccelTable::UnitIndex> &EntryRet) {
+  if (!EntryRet)
+    return AbbreviationContents(Tag);
+  return AbbreviationContents(Tag,
+                              EntryRet->IsType ? TypeOrCompileUnit::Type
+                                               : TypeOrCompileUnit::Compile,
+                              EntryRet->Index);
 }
+
 void Dwarf5AccelTableWriter::populateAbbrevsMap() {
   for (auto &Bucket : Contents.getBuckets()) {
     for (auto *Hash : Bucket) {
       for (auto *Value : Hash->Values) {
-        std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
+        std::optional<DWARF5AccelTable::UnitIndex> EntryRet =
             getIndexForEntry(*static_cast<const DWARF5AccelTableData *>(Value));
         unsigned Tag =
             static_cast<const DWARF5AccelTableData *>(Value)->getDieTag();
-        uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
-        if (Abbreviations.count(AbbrvTag) == 0) {
-          SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
-          if (EntryRet)
-            UA.push_back(EntryRet->Encoding);
-          UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
-          Abbreviations.try_emplace(AbbrvTag, UA);
-        }
+        auto AbbrevContents = constructAbbreviationContents(Tag, EntryRet);
+        Abbreviations.insert(AbbrevContents);
       }
     }
   }
@@ -479,12 +582,11 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
   Asm->OutStreamer->emitLabel(AbbrevStart);
   for (const auto &Abbrev : Abbreviations) {
     Asm->OutStreamer->AddComment("Abbrev code");
-    uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
-    assert(Tag != 0);
-    Asm->emitULEB128(Abbrev.first);
-    Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
-    Asm->emitULEB128(Tag);
-    for (const auto &AttrEnc : Abbrev.second) {
+    assert(Abbrev.Tag != 0);
+    Asm->emitULEB128(Abbrev.getUniqueCode());
+    Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.Tag));
+    Asm->emitULEB128(Abbrev.Tag);
+    for (const auto &AttrEnc : Abbrev.getAttrEncodings()) {
       Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
       Asm->emitULEB128(AttrEnc.Form,
                        dwarf::FormEncodingString(AttrEnc.Form).data());
@@ -498,17 +600,13 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
 
 void Dwarf5AccelTableWriter::emitEntry(
     const DWARF5AccelTableData &Entry) const {
-  std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
-      getIndexForEntry(Entry);
-  uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet);
-  auto AbbrevIt = Abbreviations.find(AbbrvTag);
-  assert(AbbrevIt != Abbreviations.end() &&
+  std::optional<DWARF5AccelTable::UnitIndex> EntryRet = getIndexForEntry(Entry);
+  auto AbbrevContents = constructAbbreviationContents(Entry.getDieTag(), EntryRet);
+  assert(Abbreviations.contains(AbbrevContents) &&
          "Why wasn't this abbrev generated?");
-  assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
-         "Invalid Tag");
-  Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+  Asm->emitULEB128(AbbrevContents.getUniqueCode(), "Abbreviation code");
 
-  for (const auto &AttrEnc : AbbrevIt->second) {
+  for (const auto &AttrEnc : AbbrevContents.getAttrEncodings()) {
     Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
     switch (AttrEnc.Index) {
     case dwarf::DW_IDX_compile_unit:
@@ -545,7 +643,7 @@ Dwarf5AccelTableWriter::Dwarf5AccelTableWriter(
     AsmPrinter *Asm, const AccelTableBase &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
-    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
         const DWARF5AccelTableData &)>
         getIndexForEntry,
     bool IsSplitDwarf)
@@ -620,20 +718,14 @@ void llvm::emitDWARF5AccelTable(
       Asm->getObjFileLowering().getDwarfDebugNamesSection());
 
   Contents.finalize(Asm, "names");
-  dwarf::Form CUIndexForm =
-      DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1);
-  dwarf::Form TUIndexForm =
-      DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1);
   Dwarf5AccelTableWriter(
       Asm, Contents, CompUnits, TypeUnits,
       [&](const DWARF5AccelTableData &Entry)
-          -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+          -> std::optional<DWARF5AccelTable::UnitIndex> {
         if (Entry.isTU())
-          return {{TUIndex[Entry.getUnitID()],
-                   {dwarf::DW_IDX_type_unit, TUIndexForm}}};
+          return {{TUIndex[Entry.getUnitID()], true /*IsType*/}};
         if (CUIndex.size() > 1)
-          return {{CUIndex[Entry.getUnitID()],
-                   {dwarf::DW_IDX_compile_unit, CUIndexForm}}};
+          return {{CUIndex[Entry.getUnitID()], false /*IsType*/}};
         return std::nullopt;
       },
       DD.useSplitDwarf())
@@ -651,7 +743,7 @@ void DWARF5AccelTable::addTypeUnitSignature(DwarfTypeUnit &U) {
 void llvm::emitDWARF5AccelTable(
     AsmPrinter *Asm, DWARF5AccelTable &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
-    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
+    llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndex>(
         const DWARF5AccelTableData &)>
         getIndexForEntry) {
   std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index cd649c328ed930..80a215bb216cdf 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -306,18 +306,15 @@ void DwarfStreamer::emitDebugNames(DWARF5AccelTable &Table) {
   }
 
   Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
-  dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false,
-                                          (uint64_t)UniqueIdToCuMap.size() - 1);
   /// llvm-dwarfutil doesn't support type units + .debug_names right now.
   // FIXME: add support for type units + .debug_names. For now the behavior is
   // unsuported.
   emitDWARF5AccelTable(
       Asm.get(), Table, CompUnits,
       [&](const DWARF5AccelTableData &Entry)
-          -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+          -> std::optional<DWARF5AccelTable::UnitIndex> {
         if (UniqueIdToCuMap.size() > 1)
-          return {{UniqueIdToCuMap[Entry.getUnitID()],
-                   {dwarf::DW_IDX_compile_unit, Form}}};
+          return {{UniqueIdToCuMap[Entry.getUnitID()], false /*IsType*/}};
         return std::nullopt;
       });
 }
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
index 355cfae3a64629..7ae7a5db3a9ddc 100644
--- a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
+++ b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
@@ -230,17 +230,14 @@ void DwarfEmitterImpl::emitDebugNames(DWARF5AccelTable &Table,
     return;
 
   Asm->OutStreamer->switchSection(MOFI->getDwarfDebugNamesSection());
-  dwarf::Form Form =
-      DIEInteger::BestForm(/*IsSigned*/ false, (uint64_t)CUidToIdx.size() - 1);
   // FIXME: add support for type units + .debug_names. For now the behavior is
   // unsuported.
   emitDWARF5AccelTable(
       Asm.get(), Table, CUOffsets,
       [&](const DWARF5AccelTableData &Entry)
-          -> std::optional<DWARF5AccelTable::UnitIndexAndEncoding> {
+          -> std::optional<DWARF5AccelTable::UnitIndex> {
         if (CUidToIdx.size() > 1)
-          return {{CUidToIdx[Entry.getUnitID()],
-                   {dwarf::DW_IDX_compile_unit, Form}}};
+          return {{CUidToIdx[Entry.getUnitID()], false /*IsType*/}};
         return std::nullopt;
       });
 }
diff --git a/llvm/test/DebugInfo/Generic/debug-names-many-cu.ll b/llvm/test/DebugInfo/Generic/debug-names-many-cu.ll
index 698d0cba67a67d..8b6fd734efc627 100644
--- a/llvm/test/DebugInfo/Generic/debug-names-many-cu.ll
+++ b/llvm/test/DebugInfo/Generic/debug-names-many-cu.ll
@@ -14,21 +14,26 @@
 ; ...
 ; CHECK: CU[256]: 0x{{[0-9a-f]*}}
 
-; CHECK: Abbreviation [[ABBREV:0x[0-9a-f]*]]
+; CHECK: Abbreviation [[ABBREV_1BYTE:0x[0-9a-f]*]]
+; CHECK-NEXT: Tag: DW_TAG_variable
+; CHECK-NEXT: DW_IDX_compile_unit: DW_FORM_data1
+; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+
+; CHECK: Abbreviation [[ABBREV_2BYTES:0x[0-9a-f]*]]
 ; CHECK-NEXT: Tag: DW_TAG_variable
 ; CHECK-NEXT: DW_IDX_compile_unit: DW_FORM_data2
 ; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
 
 ; CHECK: String: 0x{{[0-9a-f]*}} "foobar1"
 ; CHECK-NEXT: Entry
-; CHECK-NEXT: Abbrev: [[ABBREV]]
+; CHECK-NEXT: Abbrev: [[ABBREV_1BYTE]]
 ; CHECK-NEXT: Tag: DW_TAG_variable
-; CHECK-NEXT: DW_IDX_compile_unit: 0x0000
+; CHECK-NEXT: DW_IDX_compile_unit: 0x00
 ; CHECK-NEXT: DW_IDX_die_offset: 0x{{[0-9a-f]*}}
 
 ; CHECK: String: 0x{{[0-9a-f]*}} "foobar257"
 ; CHECK-NEXT: Entry
-; CHECK-NEXT: Abbrev: [[ABBREV]]
+; CHECK-NEXT: Abbrev: [[ABBREV_2BYTES]]
 ; CHECK-NEXT: Tag: DW_TAG_variable
 ; CHECK-NEXT: DW_IDX_compile_unit: 0x0100
 ; CHECK-NEXT: DW_IDX_die_offset: 0x{{[0-9a-f]*}}
diff --git a/llvm/test/DebugInfo/X86/debug-names-types.ll b/llvm/test/DebugInfo/X86/debug-names-types.ll
index 501b7efd88eb9a..c0e5fb2a7c0237 100644
--- a/llvm/test/DebugInfo/X86/debug-names-types.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-types.ll
@@ -37,8 +37,8 @@
 ; CHECK-NEXT:        LocalTU[0]: 0x00000000
 ; CHECK-NEXT:      ]
 ; CHECK:        Abbreviations [
-; CHECK-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
-; CHECK-NEXT:       Tag: DW_TAG_structure_type
+; CHECK-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
+; CHECK-NEXT:       Tag: DW_TAG_base_type
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:     }
 ; CHECK-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
@@ -46,17 +46,17 @@
 ; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:     }
-; CHECK-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
+; CHECK-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; CHECK-NEXT:       Tag: DW_TAG_base_type
+; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:     }
 ; CHECK-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
 ; CHECK-NEXT:       Tag: DW_TAG_subprogram
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:     }
-; CHECK-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
-; CHECK-NEXT:       Tag: DW_TAG_base_type
-; CHECK-NEXT:       DW_IDX_type_unit: DW_FORM_data1
+; CHECK-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+; CHECK-NEXT:       Tag: DW_TAG_structure_type
 ; CHECK-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-NEXT:     }
 ; CHECK-NEXT:   ]
@@ -130,8 +130,8 @@
 ; CHECK-SPLIT-NEXT:     ForeignTU[0]: 0x675d23e4f33235f2
 ; CHECK-SPLIT-NEXT:   ]
 ; CHECK-SPLIT-NEXT:   Abbreviations [
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT:       Tag: DW_TAG_structure_type
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:     }
 ; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
@@ -139,17 +139,17 @@
 ; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:     }
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
 ; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:     }
 ; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
 ; CHECK-SPLIT-NEXT:       Tag: DW_TAG_subprogram
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:     }
-; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT:       Tag: DW_TAG_base_type
-; CHECK-SPLIT-NEXT:       DW_IDX_type_unit: DW_FORM_data1
+; CHECK-SPLIT-NEXT:     Abbreviation [[ABBREV:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT:       Tag: DW_TAG_structure_type
 ; CHECK-SPLIT-NEXT:       DW_IDX_die_offset: DW_FORM_ref4
 ; CHECK-SPLIT-NEXT:     }
 ; CHECK-SPLIT-NEXT:   ]



More information about the llvm-commits mailing list