[llvm] [LLVM][DWARF] Change .debug_names abbrev to be an index (PR #81200)
Alexander Yermolovich via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 22:10:11 PST 2024
https://github.com/ayermolo updated https://github.com/llvm/llvm-project/pull/81200
>From f8b29b14e1e33ea843d32d348fde880e8be7e5b5 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 8 Feb 2024 14:21:09 -0800
Subject: [PATCH 1/6] [LLVM][DWARF] Change .debug_names abbrev to be an index
Based on the discussion in https://github.com/llvm/llvm-project/pull/80229
changed implementation to align with how .debug_abbrev is handled. So that
.debug_names abbrev tag is a monotonically increasing index. This allows for
tools like LLDB to access it in constant time.
---
llvm/include/llvm/CodeGen/AccelTable.h | 43 ++++++++
llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 104 ++++++++----------
.../test/DebugInfo/X86/debug-names-dwarf64.ll | 10 +-
llvm/test/DebugInfo/X86/debug-names-types.ll | 42 +++----
4 files changed, 115 insertions(+), 84 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index e6a661696354b7..38da67636bae85 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -360,6 +360,49 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
unsigned Index;
DWARF5AccelTableData::AttributeEncoding Encoding;
};
+ union AbbrevDescriptor {
+ struct {
+ uint32_t CompUnit : 1;
+ uint32_t TypeUnit : 1;
+ uint32_t DieOffset : 1;
+ uint32_t Parent : 2;
+ uint32_t TypeHash : 1;
+ uint32_t Tag : 26;
+ } Bits;
+ uint32_t Value = 0;
+ };
+ struct TagIndex {
+ uint32_t DieTag;
+ uint32_t Index;
+ };
+ struct cmpByTagIndex {
+ bool operator()(const TagIndex &LHS, const TagIndex &RHS) const {
+ return LHS.Index < RHS.Index;
+ }
+ };
+ enum IdxParentEncoding : uint8_t {
+ NoIndexedParent =
+ 0, /// Parent information present but parent isn't indexed.
+ Ref4 = 1, /// Parent information present and parent is indexed.
+ NoParent = 2, /// Parent information missing.
+ };
+
+ /// Returns DW_IDX_parent abbrev encoding for the given form.
+ static uint8_t
+ encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
+ if (!MaybeParentForm)
+ return NoParent;
+ switch (*MaybeParentForm) {
+ case dwarf::Form::DW_FORM_flag_present:
+ return NoIndexedParent;
+ case dwarf::Form::DW_FORM_ref4:
+ return Ref4;
+ default:
+ // This is not crashing on bad input: we should only reach this if the
+ // internal compiler logic is faulty; see getFormForIdxParent.
+ llvm_unreachable("Bad form for IDX_parent");
+ }
+ }
/// Returns type units that were constructed.
const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
/// Add a type unit start symbol.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 1024aabf2ab0f6..a30a1393ba58f6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -208,7 +208,9 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
};
Header Header;
- DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>>
+ std::map<DWARF5AccelTable::TagIndex,
+ SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>,
+ DWARF5AccelTable::cmpByTagIndex>
Abbreviations;
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
@@ -223,6 +225,15 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
bool IsSplitDwarf = false;
/// Stores the DIE offsets which are indexed by this table.
DenseSet<OffsetAndUnitID> IndexedOffsets;
+ /// Mapping between AbbrevTag and Index.
+ std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
+
+ /// Constructs and returns a unique AbbrevTag that captures what a DIE
+ /// accesses.
+ DWARF5AccelTable::TagIndex getAbbrevIndex(
+ const unsigned DieTag,
+ const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
+ const std::optional<dwarf::Form> &MaybeParentForm);
void populateAbbrevsMap();
@@ -234,7 +245,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
void emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const;
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols);
void emitData();
public:
@@ -409,49 +420,30 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) {
return {};
}
-enum IdxParentEncoding : uint8_t {
- NoIndexedParent = 0, /// Parent information present but parent isn't indexed.
- Ref4 = 1, /// Parent information present and parent is indexed.
- NoParent = 2, /// Parent information missing.
-};
-
-static uint32_t constexpr NumBitsIdxParent = 2;
-
-uint8_t encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
- if (!MaybeParentForm)
- return NoParent;
- switch (*MaybeParentForm) {
- case dwarf::Form::DW_FORM_flag_present:
- return NoIndexedParent;
- case dwarf::Form::DW_FORM_ref4:
- return Ref4;
- default:
- // This is not crashing on bad input: we should only reach this if the
- // internal compiler logic is faulty; see getFormForIdxParent.
- llvm_unreachable("Bad form for IDX_parent");
- }
-}
-
-static uint32_t constexpr ParentBitOffset = dwarf::DW_IDX_type_hash;
-static uint32_t constexpr TagBitOffset = ParentBitOffset + NumBitsIdxParent;
-static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
- return AbbrvTag >> TagBitOffset;
-}
-
-/// Constructs a unique AbbrevTag that captures what a DIE accesses.
-/// Using this tag we can emit a unique abbreviation for each DIE.
-static uint32_t constructAbbreviationTag(
- const unsigned Tag,
+DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
+ const unsigned DieTag,
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
- std::optional<dwarf::Form> MaybeParentForm) {
- uint32_t AbbrvTag = 0;
- if (EntryRet)
- AbbrvTag |= 1 << EntryRet->Encoding.Index;
- AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
- AbbrvTag |= 1 << dwarf::DW_IDX_parent;
- AbbrvTag |= encodeIdxParent(MaybeParentForm) << ParentBitOffset;
- AbbrvTag |= Tag << TagBitOffset;
- return AbbrvTag;
+ const std::optional<dwarf::Form> &MaybeParentForm) {
+ DWARF5AccelTable::AbbrevDescriptor AbbrvDesc;
+ if (EntryRet) {
+ switch (EntryRet->Encoding.Index) {
+ case dwarf::DW_IDX_compile_unit:
+ AbbrvDesc.Bits.CompUnit = true;
+ break;
+ case dwarf::DW_IDX_type_unit:
+ AbbrvDesc.Bits.TypeUnit = true;
+ break;
+ default:
+ llvm_unreachable("Invalid encoding index");
+ break;
+ }
+ }
+ AbbrvDesc.Bits.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
+ AbbrvDesc.Bits.DieOffset = true;
+ AbbrvDesc.Bits.Tag = DieTag;
+ auto Iter = AbbrevTagToIndexMap.insert(
+ {AbbrvDesc.Value, static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
+ return {DieTag, Iter.first->second};
}
static std::optional<dwarf::Form>
@@ -476,8 +468,8 @@ void Dwarf5AccelTableWriter::populateAbbrevsMap() {
unsigned Tag = Value->getDieTag();
std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent(
IndexedOffsets, Value->getParentDieOffsetAndUnitID());
- uint32_t AbbrvTag =
- constructAbbreviationTag(Tag, EntryRet, MaybeParentForm);
+ const DWARF5AccelTable::TagIndex AbbrvTag =
+ getAbbrevIndex(Tag, EntryRet, MaybeParentForm);
if (Abbreviations.count(AbbrvTag) == 0) {
SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA;
if (EntryRet)
@@ -538,11 +530,9 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
Asm->OutStreamer->emitLabel(AbbrevStart);
for (const auto &Abbrev : Abbreviations) {
Asm->OutStreamer->AddComment("Abbrev code");
- uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
- assert(Tag != 0);
- Asm->emitULEB128(Abbrev.first);
- Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
- Asm->emitULEB128(Tag);
+ Asm->emitULEB128(Abbrev.first.Index);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first.DieTag));
+ Asm->emitULEB128(Abbrev.first.DieTag);
for (const auto &AttrEnc : Abbrev.second) {
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
Asm->emitULEB128(AttrEnc.Form,
@@ -558,20 +548,18 @@ void Dwarf5AccelTableWriter::emitAbbrevs() const {
void Dwarf5AccelTableWriter::emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
- DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) const {
+ DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) {
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(Entry);
std::optional<OffsetAndUnitID> MaybeParentOffset =
Entry.getParentDieOffsetAndUnitID();
std::optional<dwarf::Form> MaybeParentForm =
getFormForIdxParent(IndexedOffsets, MaybeParentOffset);
- uint32_t AbbrvTag =
- constructAbbreviationTag(Entry.getDieTag(), EntryRet, MaybeParentForm);
- auto AbbrevIt = Abbreviations.find(AbbrvTag);
+ const DWARF5AccelTable::TagIndex TagIndexVal =
+ getAbbrevIndex(Entry.getDieTag(), EntryRet, MaybeParentForm);
+ auto AbbrevIt = Abbreviations.find(TagIndexVal);
assert(AbbrevIt != Abbreviations.end() &&
"Why wasn't this abbrev generated?");
- assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
- "Invalid Tag");
auto EntrySymbolIt =
DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID());
@@ -584,7 +572,7 @@ void Dwarf5AccelTableWriter::emitEntry(
if (EmittedAccelEntrySymbols.insert(EntrySymbol).second)
Asm->OutStreamer->emitLabel(EntrySymbol);
- Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+ Asm->emitULEB128(TagIndexVal.Index, "Abbreviation code");
for (const auto &AttrEnc : AbbrevIt->second) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
diff --git a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
index c15e2ad1d56b0c..9a5fd073358733 100644
--- a/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-dwarf64.ll
@@ -30,11 +30,6 @@
; CHECK-NEXT: CU[0]: 0x00000000
; CHECK-NEXT: ]
; CHECK-NEXT: Abbreviations [
-; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_label
-; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4
-; CHECK-NEXT: }
; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
; CHECK-NEXT: Tag: DW_TAG_base_type
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
@@ -50,6 +45,11 @@
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
+; CHECK-NEXT: Abbreviation [[ABBREV_LABEL:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_label
+; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT: DW_IDX_parent: DW_FORM_ref4
+; CHECK-NEXT: }
; CHECK-NEXT: ]
; CHECK-NEXT: Bucket 0 [
; CHECK-NEXT: Name 1 {
diff --git a/llvm/test/DebugInfo/X86/debug-names-types.ll b/llvm/test/DebugInfo/X86/debug-names-types.ll
index f41bb5524b9c33..ff0d4d52c1f071 100644
--- a/llvm/test/DebugInfo/X86/debug-names-types.ll
+++ b/llvm/test/DebugInfo/X86/debug-names-types.ll
@@ -37,20 +37,14 @@
; CHECK-NEXT: LocalTU[0]: 0x00000000
; CHECK-NEXT: ]
; CHECK: Abbreviations [
-; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_structure_type
-; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
-; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
-; CHECK-NEXT: }
-; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
; CHECK-NEXT: Tag: DW_TAG_base_type
-; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
-; CHECK-NEXT: Abbreviation [[ABBREV:0x[0-9a-f]*]] {
-; CHECK-NEXT: Tag: DW_TAG_base_type
+; CHECK-NEXT: Abbreviation [[ABBREV3:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_structure_type
+; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
@@ -64,6 +58,12 @@
; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-NEXT: }
+; CHECK-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-NEXT: Tag: DW_TAG_base_type
+; CHECK-NEXT: DW_IDX_type_unit: DW_FORM_data1
+; CHECK-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-NEXT: DW_IDX_parent: DW_FORM_flag_present
+; CHECK-NEXT: }
; CHECK-NEXT: ]
; CHECK-NEXT: Bucket 0 [
; CHECK-NEXT: Name 1 {
@@ -130,7 +130,7 @@
; CHECK-SPLIT: Foreign TU count: 1
; CHECK-SPLIT-NEXT: Bucket count: 4
; CHECK-SPLIT-NEXT: Name count: 4
-; CHECK-SPLIT-NEXT: Abbreviations table size: 0x32
+; CHECK-SPLIT-NEXT: Abbreviations table size: 0x2D
; CHECK-SPLIT-NEXT: Augmentation: 'LLVM0700'
; CHECK-SPLIT-NEXT: }
; CHECK-SPLIT-NEXT: Compilation Unit offsets [
@@ -140,20 +140,14 @@
; CHECK-SPLIT-NEXT: ForeignTU[0]: 0x675d23e4f33235f2
; CHECK-SPLIT-NEXT: ]
; CHECK-SPLIT-NEXT: Abbreviations [
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type
-; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
-; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
-; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
-; CHECK-SPLIT-NEXT: }
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
-; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
-; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV2:0x[0-9a-f]*]] {
-; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV1:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Tag: DW_TAG_structure_type
+; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
@@ -167,6 +161,12 @@
; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
; CHECK-SPLIT-NEXT: }
+; CHECK-SPLIT-NEXT: Abbreviation [[ABBREV4:0x[0-9a-f]*]] {
+; CHECK-SPLIT-NEXT: Tag: DW_TAG_base_type
+; CHECK-SPLIT-NEXT: DW_IDX_type_unit: DW_FORM_data1
+; CHECK-SPLIT-NEXT: DW_IDX_die_offset: DW_FORM_ref4
+; CHECK-SPLIT-NEXT: DW_IDX_parent: DW_FORM_flag_present
+; CHECK-SPLIT-NEXT: }
; CHECK-SPLIT-NEXT: ]
; CHECK-SPLIT-NEXT: Bucket 0 [
; CHECK-SPLIT-NEXT: Name 1 {
>From b767251e86ee40351e49810b58a6206d74751839 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 8 Feb 2024 18:45:38 -0800
Subject: [PATCH 2/6] removed union, changed to DenseMap
---
llvm/include/llvm/CodeGen/AccelTable.h | 18 ++++++++----------
llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 15 ++++++++-------
.../ARM/dwarf5-dwarf4-combination-macho.test | 2 +-
3 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index 38da67636bae85..ee318eb2640cc4 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -360,16 +360,14 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
unsigned Index;
DWARF5AccelTableData::AttributeEncoding Encoding;
};
- union AbbrevDescriptor {
- struct {
- uint32_t CompUnit : 1;
- uint32_t TypeUnit : 1;
- uint32_t DieOffset : 1;
- uint32_t Parent : 2;
- uint32_t TypeHash : 1;
- uint32_t Tag : 26;
- } Bits;
- uint32_t Value = 0;
+
+ struct AbbrevDescriptor {
+ uint32_t CompUnit : 1;
+ uint32_t TypeUnit : 1;
+ uint32_t DieOffset : 1;
+ uint32_t Parent : 2;
+ uint32_t TypeHash : 1;
+ uint32_t Tag : 26;
};
struct TagIndex {
uint32_t DieTag;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index a30a1393ba58f6..63f0cb5546c352 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -226,7 +226,7 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
/// Stores the DIE offsets which are indexed by this table.
DenseSet<OffsetAndUnitID> IndexedOffsets;
/// Mapping between AbbrevTag and Index.
- std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
+ DenseMap<uint32_t, uint32_t> AbbrevTagToIndexMap;
/// Constructs and returns a unique AbbrevTag that captures what a DIE
/// accesses.
@@ -425,24 +425,25 @@ DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
const std::optional<dwarf::Form> &MaybeParentForm) {
DWARF5AccelTable::AbbrevDescriptor AbbrvDesc;
+ memset(&AbbrvDesc, 0, sizeof AbbrvDesc);
if (EntryRet) {
switch (EntryRet->Encoding.Index) {
case dwarf::DW_IDX_compile_unit:
- AbbrvDesc.Bits.CompUnit = true;
+ AbbrvDesc.CompUnit = true;
break;
case dwarf::DW_IDX_type_unit:
- AbbrvDesc.Bits.TypeUnit = true;
+ AbbrvDesc.TypeUnit = true;
break;
default:
llvm_unreachable("Invalid encoding index");
break;
}
}
- AbbrvDesc.Bits.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
- AbbrvDesc.Bits.DieOffset = true;
- AbbrvDesc.Bits.Tag = DieTag;
+ AbbrvDesc.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
+ AbbrvDesc.DieOffset = true;
+ AbbrvDesc.Tag = DieTag;
auto Iter = AbbrevTagToIndexMap.insert(
- {AbbrvDesc.Value, static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
+ {bit_cast<std::uint32_t>(AbbrvDesc), static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
return {DieTag, Iter.first->second};
}
diff --git a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
index 5a37b4247b5bf3..e80e76ba8f8136 100644
--- a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
+++ b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
@@ -221,7 +221,7 @@ CHECK-NEXT: Name Index @ 0x0 {
CHECK-NEXT: Header {
; FIXME: when the parallel dwarf linker is able to generate DW_IDX_parent,
; these headers should be the same.
-WITH-PARENTS-NEXT: Length: 0xC8
+WITH-PARENTS-NEXT: Length: 0xC0
NO-PARENTS-NEXT: Length: 0xC4
CHECK-NEXT: Format: DWARF32
CHECK-NEXT: Version: 5
>From d77d99ee51e2c80b6159b4cf4d1398ec91517768 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 8 Feb 2024 18:52:04 -0800
Subject: [PATCH 3/6] format
---
llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 63f0cb5546c352..064d3d8bf689cc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -443,7 +443,8 @@ DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
AbbrvDesc.DieOffset = true;
AbbrvDesc.Tag = DieTag;
auto Iter = AbbrevTagToIndexMap.insert(
- {bit_cast<std::uint32_t>(AbbrvDesc), static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
+ {bit_cast<uint32_t>(AbbrvDesc),
+ static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
return {DieTag, Iter.first->second};
}
>From 17a9ae383f3bfb9127db50d7e569c72e0f18e583 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Thu, 8 Feb 2024 19:40:39 -0800
Subject: [PATCH 4/6] updated test
---
.../tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
index e80e76ba8f8136..1d24a3ae8b3cc0 100644
--- a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
+++ b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
@@ -230,7 +230,7 @@ CHECK-NEXT: Local TU count: 0
CHECK-NEXT: Foreign TU count: 0
CHECK-NEXT: Bucket count: 5
CHECK-NEXT: Name count: 5
-WITH-PARENTS-NEXT: Abbreviations table size: 0x17
+WITH-PARENTS-NEXT: Abbreviations table size: 0x15
NO-PARENTS-NEXT: Abbreviations table size: 0x13
CHECK-NEXT: Augmentation: 'LLVM0700'
CHECK-NEXT: }
>From a1e412af2bf4bf613021f72205f249ab2469f08b Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Fri, 9 Feb 2024 07:32:43 -0800
Subject: [PATCH 5/6] updated test
---
.../tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
index 1d24a3ae8b3cc0..fb15f46ce0b245 100644
--- a/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
+++ b/llvm/test/tools/dsymutil/ARM/dwarf5-dwarf4-combination-macho.test
@@ -222,7 +222,7 @@ CHECK-NEXT: Header {
; FIXME: when the parallel dwarf linker is able to generate DW_IDX_parent,
; these headers should be the same.
WITH-PARENTS-NEXT: Length: 0xC0
-NO-PARENTS-NEXT: Length: 0xC4
+NO-PARENTS-NEXT: Length: 0xBC
CHECK-NEXT: Format: DWARF32
CHECK-NEXT: Version: 5
CHECK-NEXT: CU count: 2
@@ -231,6 +231,6 @@ CHECK-NEXT: Foreign TU count: 0
CHECK-NEXT: Bucket count: 5
CHECK-NEXT: Name count: 5
WITH-PARENTS-NEXT: Abbreviations table size: 0x15
-NO-PARENTS-NEXT: Abbreviations table size: 0x13
+NO-PARENTS-NEXT: Abbreviations table size: 0x11
CHECK-NEXT: Augmentation: 'LLVM0700'
CHECK-NEXT: }
>From fe25bb48fdd42224ddde253a38b8037077752e88 Mon Sep 17 00:00:00 2001
From: Alexander Yermolovich <ayermolo at meta.com>
Date: Sun, 11 Feb 2024 22:08:58 -0800
Subject: [PATCH 6/6] Changed to set
---
llvm/include/llvm/CodeGen/AccelTable.h | 90 +++++++---------
llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp | 119 +++++++++------------
2 files changed, 90 insertions(+), 119 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index ee318eb2640cc4..5e0e100f96ae92 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -275,11 +275,6 @@ struct DenseMapInfo<OffsetAndUnitID> : DenseMapInfo<OffsetAndUnitID::Base> {};
/// emitDWARF5AccelTable function.
class DWARF5AccelTableData : public AccelTableData {
public:
- struct AttributeEncoding {
- dwarf::Index Index;
- dwarf::Form Form;
- };
-
static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID,
@@ -289,7 +284,7 @@ class DWARF5AccelTableData : public AccelTableData {
const unsigned DieTag, const unsigned UnitID,
const bool IsTU = false)
: OffsetVal(DieOffset), ParentOffset(DefiningParentOffset),
- DieTag(DieTag), UnitID(UnitID), IsTU(IsTU) {}
+ DieTag(DieTag), AbbrevID(0), IsTU(IsTU), UnitID(UnitID) {}
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
@@ -330,6 +325,12 @@ class DWARF5AccelTableData : public AccelTableData {
return OffsetAndUnitID(*ParentOffset, getUnitID());
}
+ /// Sets AbbrevIndex for an Entry.
+ void setAbbrevIndex(uint16_t AbbrevIndex) { AbbrevID = AbbrevIndex; }
+
+ /// Returns AbbrevIndex for an Entry.
+ uint16_t getAbbrevIndex() const { return AbbrevID; }
+
/// If `Die` has a non-null parent and the parent is not a declaration,
/// return its offset.
static std::optional<uint64_t> getDefiningParentDieOffset(const DIE &Die);
@@ -338,12 +339,42 @@ class DWARF5AccelTableData : public AccelTableData {
std::variant<const DIE *, uint64_t> OffsetVal;
std::optional<uint64_t> ParentOffset;
uint32_t DieTag : 16;
- uint32_t UnitID : 15;
+ uint32_t AbbrevID : 15;
uint32_t IsTU : 1;
-
+ uint32_t UnitID;
uint64_t order() const override { return getDieOffset(); }
};
+class DebugNamesAbbrev : public FoldingSetNode {
+public:
+ uint32_t DieTag;
+ uint32_t Index;
+ struct AttributeEncoding {
+ dwarf::Index Index;
+ dwarf::Form Form;
+ };
+ DebugNamesAbbrev(uint32_t DieTag) : DieTag(DieTag) {}
+ /// Add attribute encoding to an abbreviation.
+ void addAttribute(const DebugNamesAbbrev::AttributeEncoding &Attr) {
+ AttrVect.push_back(Attr);
+ }
+ /// Set abbreviation tag index.
+ void setAbbrevTagIndex(uint32_t AbbrevIndex) { Index = AbbrevIndex; }
+ /// Get abbreviation tag index.
+ uint32_t getAbbrevTagIndex() const { return Index; }
+ /// Get DIE Tag.
+ uint32_t getDieTag() const { return DieTag; }
+ /// Used to gather unique data for the abbreviation folding set.
+ void Profile(FoldingSetNodeID &ID) const;
+ /// Returns attributes for an abbreviation.
+ const SmallVector<AttributeEncoding, 1> &getAttributes() const {
+ return AttrVect;
+ }
+
+private:
+ SmallVector<AttributeEncoding, 1> AttrVect;
+};
+
struct TypeUnitMetaInfo {
// Symbol for start of the TU section or signature if this is SplitDwarf.
std::variant<MCSymbol *, uint64_t> LabelOrSignature;
@@ -358,49 +389,8 @@ class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
public:
struct UnitIndexAndEncoding {
unsigned Index;
- DWARF5AccelTableData::AttributeEncoding Encoding;
- };
-
- struct AbbrevDescriptor {
- uint32_t CompUnit : 1;
- uint32_t TypeUnit : 1;
- uint32_t DieOffset : 1;
- uint32_t Parent : 2;
- uint32_t TypeHash : 1;
- uint32_t Tag : 26;
- };
- struct TagIndex {
- uint32_t DieTag;
- uint32_t Index;
+ DebugNamesAbbrev::AttributeEncoding Encoding;
};
- struct cmpByTagIndex {
- bool operator()(const TagIndex &LHS, const TagIndex &RHS) const {
- return LHS.Index < RHS.Index;
- }
- };
- enum IdxParentEncoding : uint8_t {
- NoIndexedParent =
- 0, /// Parent information present but parent isn't indexed.
- Ref4 = 1, /// Parent information present and parent is indexed.
- NoParent = 2, /// Parent information missing.
- };
-
- /// Returns DW_IDX_parent abbrev encoding for the given form.
- static uint8_t
- encodeIdxParent(const std::optional<dwarf::Form> MaybeParentForm) {
- if (!MaybeParentForm)
- return NoParent;
- switch (*MaybeParentForm) {
- case dwarf::Form::DW_FORM_flag_present:
- return NoIndexedParent;
- case dwarf::Form::DW_FORM_ref4:
- return Ref4;
- default:
- // This is not crashing on bad input: we should only reach this if the
- // internal compiler logic is faulty; see getFormForIdxParent.
- llvm_unreachable("Bad form for IDX_parent");
- }
- }
/// Returns type units that were constructed.
const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
/// Add a type unit start symbol.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 064d3d8bf689cc..1d34295e565c56 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -208,10 +208,13 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
};
Header Header;
- std::map<DWARF5AccelTable::TagIndex,
- SmallVector<DWARF5AccelTableData::AttributeEncoding, 3>,
- DWARF5AccelTable::cmpByTagIndex>
- Abbreviations;
+ /// FoldingSet that uniques the abbreviations.
+ FoldingSet<DebugNamesAbbrev> AbbreviationsSet;
+ /// Vector containing DebugNames abbreviations for iteration in order.
+ SmallVector<DebugNamesAbbrev *, 5> AbbreviationsVector;
+ /// The bump allocator to use when creating DIEAbbrev objects in the uniqued
+ /// storage container.
+ BumpPtrAllocator Alloc;
ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
@@ -225,15 +228,6 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
bool IsSplitDwarf = false;
/// Stores the DIE offsets which are indexed by this table.
DenseSet<OffsetAndUnitID> IndexedOffsets;
- /// Mapping between AbbrevTag and Index.
- DenseMap<uint32_t, uint32_t> AbbrevTagToIndexMap;
-
- /// Constructs and returns a unique AbbrevTag that captures what a DIE
- /// accesses.
- DWARF5AccelTable::TagIndex getAbbrevIndex(
- const unsigned DieTag,
- const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
- const std::optional<dwarf::Form> &MaybeParentForm);
void populateAbbrevsMap();
@@ -381,7 +375,7 @@ void AppleAccelTableWriter::emit() const {
DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
const uint32_t UnitID,
const bool IsTU)
- : OffsetVal(&Die), DieTag(Die.getTag()), UnitID(UnitID), IsTU(IsTU) {}
+ : OffsetVal(&Die), DieTag(Die.getTag()), IsTU(IsTU), UnitID(UnitID) {}
void Dwarf5AccelTableWriter::Header::emit(Dwarf5AccelTableWriter &Ctx) {
assert(CompUnitCount > 0 && "Index must have at least one CU.");
@@ -420,34 +414,6 @@ DWARF5AccelTableData::getDefiningParentDieOffset(const DIE &Die) {
return {};
}
-DWARF5AccelTable::TagIndex Dwarf5AccelTableWriter::getAbbrevIndex(
- const unsigned DieTag,
- const std::optional<DWARF5AccelTable::UnitIndexAndEncoding> &EntryRet,
- const std::optional<dwarf::Form> &MaybeParentForm) {
- DWARF5AccelTable::AbbrevDescriptor AbbrvDesc;
- memset(&AbbrvDesc, 0, sizeof AbbrvDesc);
- if (EntryRet) {
- switch (EntryRet->Encoding.Index) {
- case dwarf::DW_IDX_compile_unit:
- AbbrvDesc.CompUnit = true;
- break;
- case dwarf::DW_IDX_type_unit:
- AbbrvDesc.TypeUnit = true;
- break;
- default:
- llvm_unreachable("Invalid encoding index");
- break;
- }
- }
- AbbrvDesc.Parent = DWARF5AccelTable::encodeIdxParent(MaybeParentForm);
- AbbrvDesc.DieOffset = true;
- AbbrvDesc.Tag = DieTag;
- auto Iter = AbbrevTagToIndexMap.insert(
- {bit_cast<uint32_t>(AbbrvDesc),
- static_cast<uint32_t>(AbbrevTagToIndexMap.size() + 1)});
- return {DieTag, Iter.first->second};
-}
-
static std::optional<dwarf::Form>
getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets,
std::optional<OffsetAndUnitID> ParentOffset) {
@@ -461,26 +427,44 @@ getFormForIdxParent(const DenseSet<OffsetAndUnitID> &IndexedOffsets,
return dwarf::Form::DW_FORM_flag_present;
}
+void DebugNamesAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(DieTag);
+ for (const DebugNamesAbbrev::AttributeEncoding &Enc : AttrVect) {
+ ID.AddInteger(Enc.Index);
+ if (Enc.Index == dwarf::DW_IDX_parent)
+ ID.AddInteger(Enc.Form);
+ }
+}
+
void Dwarf5AccelTableWriter::populateAbbrevsMap() {
for (auto &Bucket : Contents.getBuckets()) {
for (auto *Hash : Bucket) {
- for (auto *Value : Hash->getValues<DWARF5AccelTableData *>()) {
+ for (DWARF5AccelTableData *Value :
+ Hash->getValues<DWARF5AccelTableData *>()) {
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(*Value);
- unsigned Tag = Value->getDieTag();
std::optional<dwarf::Form> MaybeParentForm = getFormForIdxParent(
IndexedOffsets, Value->getParentDieOffsetAndUnitID());
- const DWARF5AccelTable::TagIndex AbbrvTag =
- getAbbrevIndex(Tag, EntryRet, MaybeParentForm);
- if (Abbreviations.count(AbbrvTag) == 0) {
- SmallVector<DWARF5AccelTableData::AttributeEncoding, 3> UA;
- if (EntryRet)
- UA.push_back(EntryRet->Encoding);
- UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
- if (MaybeParentForm)
- UA.push_back({dwarf::DW_IDX_parent, *MaybeParentForm});
- Abbreviations.try_emplace(AbbrvTag, UA);
+ DebugNamesAbbrev Abbrev(Value->getDieTag());
+ if (EntryRet)
+ Abbrev.addAttribute(EntryRet->Encoding);
+ Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+ if (MaybeParentForm)
+ Abbrev.addAttribute({dwarf::DW_IDX_parent, *MaybeParentForm});
+ FoldingSetNodeID ID;
+ Abbrev.Profile(ID);
+ void *InsertPos;
+ if (DebugNamesAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Value->setAbbrevIndex(Existing->getAbbrevTagIndex());
+ continue;
}
+ DebugNamesAbbrev *NewAbbrev =
+ new (Alloc) DebugNamesAbbrev(std::move(Abbrev));
+ NewAbbrev->setAbbrevTagIndex(AbbreviationsVector.size());
+ AbbreviationsVector.push_back(NewAbbrev);
+ AbbreviationsSet.InsertNode(NewAbbrev, InsertPos);
+ Value->setAbbrevIndex(NewAbbrev->getAbbrevTagIndex());
}
}
}
@@ -530,12 +514,13 @@ void Dwarf5AccelTableWriter::emitStringOffsets() const {
void Dwarf5AccelTableWriter::emitAbbrevs() const {
Asm->OutStreamer->emitLabel(AbbrevStart);
- for (const auto &Abbrev : Abbreviations) {
+ for (const DebugNamesAbbrev *Abbrev : AbbreviationsVector) {
Asm->OutStreamer->AddComment("Abbrev code");
- Asm->emitULEB128(Abbrev.first.Index);
- Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first.DieTag));
- Asm->emitULEB128(Abbrev.first.DieTag);
- for (const auto &AttrEnc : Abbrev.second) {
+ Asm->emitULEB128(Abbrev->getAbbrevTagIndex() + 1);
+ Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev->getDieTag()));
+ Asm->emitULEB128(Abbrev->getDieTag());
+ for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc :
+ Abbrev->getAttributes()) {
Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
Asm->emitULEB128(AttrEnc.Form,
dwarf::FormEncodingString(AttrEnc.Form).data());
@@ -551,18 +536,13 @@ void Dwarf5AccelTableWriter::emitEntry(
const DWARF5AccelTableData &Entry,
const DenseMap<OffsetAndUnitID, MCSymbol *> &DIEOffsetToAccelEntryLabel,
DenseSet<MCSymbol *> &EmittedAccelEntrySymbols) {
+ assert(Entry.getAbbrevIndex() < AbbreviationsVector.size() &&
+ "Entry abbrev index is outside of abbreviations vector range.");
+ DebugNamesAbbrev *Abbrev = AbbreviationsVector[Entry.getAbbrevIndex()];
std::optional<DWARF5AccelTable::UnitIndexAndEncoding> EntryRet =
getIndexForEntry(Entry);
std::optional<OffsetAndUnitID> MaybeParentOffset =
Entry.getParentDieOffsetAndUnitID();
- std::optional<dwarf::Form> MaybeParentForm =
- getFormForIdxParent(IndexedOffsets, MaybeParentOffset);
- const DWARF5AccelTable::TagIndex TagIndexVal =
- getAbbrevIndex(Entry.getDieTag(), EntryRet, MaybeParentForm);
- auto AbbrevIt = Abbreviations.find(TagIndexVal);
- assert(AbbrevIt != Abbreviations.end() &&
- "Why wasn't this abbrev generated?");
-
auto EntrySymbolIt =
DIEOffsetToAccelEntryLabel.find(Entry.getDieOffsetAndUnitID());
assert(EntrySymbolIt != DIEOffsetToAccelEntryLabel.end());
@@ -574,9 +554,10 @@ void Dwarf5AccelTableWriter::emitEntry(
if (EmittedAccelEntrySymbols.insert(EntrySymbol).second)
Asm->OutStreamer->emitLabel(EntrySymbol);
- Asm->emitULEB128(TagIndexVal.Index, "Abbreviation code");
+ Asm->emitULEB128(Entry.getAbbrevIndex() + 1, "Abbreviation code");
- for (const auto &AttrEnc : AbbrevIt->second) {
+ for (const DebugNamesAbbrev::AttributeEncoding &AttrEnc :
+ Abbrev->getAttributes()) {
Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
switch (AttrEnc.Index) {
case dwarf::DW_IDX_compile_unit:
More information about the llvm-commits
mailing list