[llvm] [LLVM][DWARF] Change .debug_names abbrev to be an index (PR #81200)
Alexander Yermolovich via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 14:30:35 PST 2024
https://github.com/ayermolo created https://github.com/llvm/llvm-project/pull/81200
Based on the discussion in https://github.com/llvm/llvm-project/pull/80229
changed implementation to align with how .debug_abbrev is handled. So that
.debug_names abbrev tag is a monotonically increasing index. This allows for
tools like LLDB to access it in constant time.
>From f8b29b14e1e33ea843d32d348fde880e8be7e5b5 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 8 Feb 2024 14:21:09 -0800
Subject: [PATCH] [LLVM][DWARF] Change .debug_names abbrev to be an index
Based on the discussion in https://github.com/llvm/llvm-project/pull/80229
changed implementation to align with how .debug_abbrev is handled. So that
.debug_names abbrev tag is a monotonically increasing index. This allows for
tools like LLDB to access it in constant time.
---
llvm/include/llvm/CodeGen/AccelTable.h | 43 ++++++++
llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 104 ++++++++----------
.../test/DebugInfo/X86/debug-names-dwarf64.ll | 10 +-
llvm/test/DebugInfo/X86/debug-names-types.ll | 42 +++----
4 files changed, 115 insertions(+), 84 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index e6a661696354b7..38da67636bae85 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -360,6 +360,49 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
unsigned Index;
DWARF5AccelTableData::AttributeEncoding Encoding;
};
+ union AbbrevDescriptor {
+ struct {
+ uint32_t CompUnit : 1;
+ uint32_t TypeUnit : 1;
+ uint32_t DieOffset : 1;
+ uint32_t Parent : 2;
+ uint32_t TypeHash : 1;
+ uint32_t Tag : 26;
+ } Bits;
+ uint32_t Value = 0;
+ };
+ struct TagIndex {
+ uint32_t DieTag;
+ uint32_t Index;
+ };
+ struct cmpByTagIndex {
+ bool operator()(const TagIndex &LHS, const TagIndex &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+ enum IdxParentEncoding : uint8_t {
+ NoIndexedParent =
+ 0, /// Parent information present but parent isn't indexed.
+ Ref4 = 1, /// Parent information present and parent is indexed.
+ NoParent = 2, /// Parent information missing.
+ };
+
+ /// Returns DW_IDX_parent abbrev encoding for the given form.
+ static uint8_t
+ encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
+ if (!MaybeParentForm)
+ return NoParent;
+ switch (*MaybeParentForm) {
+ case dwarf::Form::DW_FORM_flag_present:
+ return NoIndexedParent;
+ case dwarf::Form::DW_FORM_ref4:
+ return Ref4;
+ default:
+ // This is not crashing on bad input: we should only reach this if the
+ // internal compiler logic is faulty; see getFormForIdxParent.
+ llvm_unreachable("Bad form for IDX_parent");
+ }
+ }
/// Returns type units that were constructed.
const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
/// Add a type unit start symbol.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 1024aabf2ab0f6..a30a1393ba58f6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -208,7 +208,9 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
};
Header Header;
- DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>>
+ std::map<DWARF5AccelTable::TagIndex,
+ SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>,
+ DWARF5AccelTable::cmpByTagIndex>
Abbreviations;
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
@@ -223,6 +225,15 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
bool IsSplitDwarf = false;
/// Stores the DIE offsets which are indexed by this table.
DenseSet<OffsetAndUnitID> IndexedOffsets;
+ /// Mapping between AbbrevTag and Index.
+ std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
+
+ /// Constructs and returns a unique AbbrevTag that captures what a DIE
+ /// accesses.
+ DWARF5AccelTable::TagIndex getAbbrevIndex(
+ const unsigned DieTag,
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
+ const std::optional<dwarf::Form> &MaybeParentForm);
void populateAbbrevsMap();
@@ -234,7 +245,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
void emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const;
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols);
void emitData();
public:
@@ -409,49 +420,30 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) {
return {};
}
-enum IdxParentEncoding : uint8_t {
- NoIndexedParent = 0, /// Parent information present but parent isn't indexed.
- Ref4 = 1, /// Parent information present and parent is indexed.
- NoParent = 2, /// Parent information missing.
-};
-
-static uint32_t constexpr NumBitsIdxParent = 2;
-
-uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
- if (!MaybeParentForm)
- return NoParent;
- switch (*MaybeParentForm) {
- case dwarf::Form::DW_FORM_flag_present:
- return NoIndexedParent;
- case dwarf::Form::DW_FORM_ref4:
- return Ref4;
- default:
- // This is not crashing on bad input: we should only reach this if the
- // internal compiler logic is faulty; see getFormForIdxParent.
- llvm_unreachable("Bad form for IDX_parent");
- }
-}
-
-static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash;
-static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent;
-static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
- return AbbrvTag >> TagBitOffset;
-}
-
-/// Constructs a unique AbbrevTag that captures what a DIE accesses.
-/// Using this tag we can emit a unique abbreviation for each DIE.
-static uint32_t constructAbbreviationTag(
- const unsigned Tag,
+DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
+ const unsigned DieTag,
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
- std::optional<dwarf::Form> MaybeParentForm) {
- uint32_t AbbrvTag = 0;
- if (EntryRet)
- AbbrvTag |= 1 << EntryRet->Encoding.Index;
- AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
- AbbrvTag |= 1 << dwarf::DW_IDX_parent;
- AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset;
- AbbrvTag |= Tag << TagBitOffset;
- return AbbrvTag;
+ const std::optional<dwarf::Form> &MaybeParentForm) {
+ DWARF5AccelTable::AbbrevDescriptor AbbrvDesc;
+ if (EntryRet) {
+ switch (EntryRet->Encoding.Index) {
+ case dwarf::DW_IDX_compile_unit:
+ AbbrvDesc.Bits.CompUnit = true;
+ break;
+ case dwarf::DW_IDX_type_unit:
+ AbbrvDesc.Bits.TypeUnit = true;
+ break;
+ default:
+ llvm_unreachable("Invalid encoding index");
+ break;
+ }
+ }
+ AbbrvDesc.Bits.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
+ AbbrvDesc.Bits.DieOffset = true;
+ AbbrvDesc.Bits.Tag = DieTag;
+ auto Iter = AbbrevTagToIndexMap.insert(
+ {AbbrvDesc.Value, static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
+ return {DieTag, Iter.first->second};
}
static std::optional<dwarf::Form>
@@ -476,8 +468,8 @@ void Dwarf5AccelTableWriter::populateAbbrevsMap() {
unsigned Tag = Value->getDieTag();
std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent(
IndexedOffsets, Value->getParentDieOffsetAndUnitID());
- uint32_t AbbrvTag =
- constructAbbreviationTag(Tag, EntryRet, MaybeParentForm);
+ const DWARF5AccelTable::TagIndex AbbrvTag =
+ getAbbrevIndex(Tag, EntryRet, MaybeParentForm);
if (Abbreviations.count(AbbrvTag) == 0) {
SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA;
if (EntryRet)
@@ -538,11 +530,9 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
Asm->OutStreamer->emitLabel(AbbrevStart);
for (const auto &Abbrev : Abbreviations) {
Asm->OutStreamer->AddComment("Abbrev code");
- uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
- assert(Tag != 0);
- Asm->emitULEB128(Abbrev.first);
- Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
- Asm->emitULEB128(Tag);
+ Asm->emitULEB128(Abbrev.first.Index);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first.DieTag));
+ Asm->emitULEB128(Abbrev.first.DieTag);
for (const auto &AttrEnc : Abbrev.second) {
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
Asm->emitULEB128(AttrEnc.Form,
@@ -558,20 +548,18 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
void Dwarf5AccelTableWriter::emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const {
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) {
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(Entry);
std::optional<OffsetAndUnitID> MaybeParentOffset =
Entry.getParentDieOffsetAndUnitID();
std::optional<dwarf::Form> MaybeParentForm =
getFormForIdxParent(IndexedOffsets, MaybeParentOffset);
- uint32_t AbbrvTag =
- constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm);
- auto AbbrevIt = Abbreviations.find(AbbrvTag);
+ const DWARF5AccelTable::TagIndex TagIndexVal =
+ getAbbrevIndex(Entry.getDieTag(), EntryRet, MaybeParentForm);
+ auto AbbrevIt = Abbreviations.find(TagIndexVal);
assert(AbbrevIt != Abbreviations.end() &&
"Why wasn't this abbrev generated?");
- assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
- "Invalid Tag");
auto EntrySymbolIt =
DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID());
@@ -584,7 +572,7 @@ void Dwarf5AccelTableWriter::emitEntry(
if (EmittedAccelEntrySymbols.insert(EntrySymbol).second)
Asm->OutStreamer->emitLabel(EntrySymbol);
- Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+ Asm->emitULEB128(TagIndexVal.Index, "Abbreviation code");
for (const auto &AttrEnc : AbbrevIt->second) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
diff --git a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
index c15e2ad1d56b0c..9a5fd073358733 100644
--- a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
@@ -30,11 +30,6 @@
; CHECK-NEXT: CU[0]: 0x00000000
; CHECK-NEXT: ]
; CHECK-NEXT: Abbreviations [
-; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_label
-; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4
-; CHECK-NEXT: }
; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
; CHECK-NEXT: Tag: DW_TAG_base_type
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
@@ -50,6 +45,11 @@
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
+; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_label
+; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4
+; CHECK-NEXT: }
; CHECK-NEXT: ]
; CHECK-NEXT: Bucket 0 [
; CHECK-NEXT: Name 1 {
diff --git a/llvm/test/DebugInfo/X86/debug-names-types.ll b/llvm/test/DebugInfo/X86/debug-names-types.ll
index f41bb5524b9c33..ff0d4d52c1f071 100644
--- a/llvm/test/DebugInfo/X86/debug-names-types.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-types.ll
@@ -37,20 +37,14 @@
; CHECK-NEXT: LocalTU[0]: 0x00000000
; CHECK-NEXT: ]
; CHECK: Abbreviations [
-; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_structure_type
-; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
-; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
-; CHECK-NEXT: }
-; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
; CHECK-NEXT: Tag: DW_TAG_base_type
-; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
-; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_base_type
+; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_structure_type
+; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
@@ -64,6 +58,12 @@
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
+; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_base_type
+; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
+; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
+; CHECK-NEXT: }
; CHECK-NEXT: ]
; CHECK-NEXT: Bucket 0 [
; CHECK-NEXT: Name 1 {
@@ -130,7 +130,7 @@
; CHECK-SPLIT: Foreign TU count: 1
; CHECK-SPLIT-NEXT: Bucket count: 4
; CHECK-SPLIT-NEXT: Name count: 4
-; CHECK-SPLIT-NEXT: Abbreviations table size: 0x32
+; CHECK-SPLIT-NEXT: Abbreviations table size: 0x2D
; CHECK-SPLIT-NEXT: Augmentation: 'LLVM0700'
; CHECK-SPLIT-NEXT: }
; CHECK-SPLIT-NEXT: Compilation Unit offsets [
@@ -140,20 +140,14 @@
; CHECK-SPLIT-NEXT: ForeignTU[0]: 0x675d23e4f33235f2
; CHECK-SPLIT-NEXT: ]
; CHECK-SPLIT-NEXT: Abbreviations [
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type
-; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
-; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
-; CHECK-SPLIT-NEXT: }
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
-; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type
+; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
@@ -167,6 +161,12 @@
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
+; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
+; CHECK-SPLIT-NEXT: }
; CHECK-SPLIT-NEXT: ]
; CHECK-SPLIT-NEXT: Bucket 0 [
; CHECK-SPLIT-NEXT: Name 1 {
More information about the llvm-commits
mailing list