[llvm] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit abbrevs (PR #108757)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 07:10:30 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld-macho
Author: Daniel Bertalan (BertalanD)
<details>
<summary>Changes</summary>
In `DWARFDebugInfoEntry::extractFast`, we were parsing all abbreviation declarations belonging to the compilation unit by calling the `getAbbreviations` method. This resulted in a large overhead (mostly vector resizes and ULEB128 parsing) in cases where only the Compilation Unit DIE ended up being used.
As `DW_TAG_compile_unit` typically comes first in the abbreviation table, this commit adds a fast-path function (`tryExtractCUAbbrevFast`) which attempts to read only the first abbreviation, without constructing a full `DWARFAbbreviationDeclarationSet`.
This significantly speeds up `ld64.lld`'s generation of `N_OSO` stab information (which needs `DW_AT_name` from the Compilation Unit DIE). The following measurement was taken on an M1 Mac Mini linking Chromium with full debug info:
x: before
+: after
N Min Max Median Avg Stddev
x 15 3.136759 4.390569 3.5234511 3.6028554 0.38726359
+ 15 2.7222703 3.5872169 3.237128 3.1830136 0.31002649
Difference at 95.0% confidence
-0.419842 +/- 0.26232
-11.653% +/- 7.28088%
(Student's t, pooled s = 0.350777)
---
Full diff: https://github.com/llvm/llvm-project/pull/108757.diff
5 Files Affected:
- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h (+4)
- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h (+4)
- (modified) llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp (+18)
- (modified) llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp (+35-22)
- (modified) llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp (+11)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 6439827ef70f0f..18555bafdc1f01 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -62,6 +62,7 @@ class DWARFDebugAbbrev {
mutable DWARFAbbreviationDeclarationSetMap AbbrDeclSets;
mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos;
mutable std::optional<DataExtractor> Data;
+ mutable std::map<uint64_t, DWARFAbbreviationDeclaration> CUAbbrevs;
public:
DWARFDebugAbbrev(DataExtractor Data);
@@ -69,6 +70,9 @@ class DWARFDebugAbbrev {
Expected<const DWARFAbbreviationDeclarationSet *>
getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
+ Expected<const DWARFAbbreviationDeclaration *>
+ tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const;
+
void dump(raw_ostream &OS) const;
Error parse() const;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea893123..87f8742fd9d9f0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -419,6 +419,10 @@ class DWARFUnit {
uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
+ /// Extracts only the abbreviation declaration with code 1, which is
+ /// typically the compile unit DIE (DW_TAG_compile_unit).
+ const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+
const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 85959ecc5e17f1..7944fc881e6bd1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -168,3 +168,21 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
.first;
return &PrevAbbrOffsetPos->second;
}
+
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
+ if (auto AbbrevDecl = CUAbbrevs.find(CUAbbrOffset);
+ AbbrevDecl != CUAbbrevs.end())
+ return &AbbrevDecl->second;
+
+ DWARFAbbreviationDeclaration Decl;
+ uint64_t Offset = CUAbbrOffset;
+ Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
+ Decl.extract(*Data, &Offset);
+ if (!ES)
+ return ES.takeError();
+ if (Decl.getCode() != 1)
+ return nullptr;
+
+ return &(CUAbbrevs[CUAbbrOffset] = std::move(Decl));
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 4f0a6d96ace9e2..030faad13f46f6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -34,36 +34,49 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
return false;
}
assert(DebugInfoData.isValidOffset(UEndOffset - 1));
+ AbbrevDecl = nullptr;
+
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
if (0 == AbbrCode) {
// NULL debug tag entry.
- AbbrevDecl = nullptr;
return true;
}
- const auto *AbbrevSet = U.getAbbreviations();
- if (!AbbrevSet) {
- U.getContext().getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "contains invalid abbreviation set offset 0x%" PRIx64,
- U.getOffset(), U.getAbbreviationsOffset()));
- // Restore the original offset.
- *OffsetPtr = Offset;
- return false;
+
+ // Fast path: parsing the entire abbreviation table is wasteful if we only
+ // need the unit DIE (typically AbbrCode == 1).
+ if (1 == AbbrCode) {
+ AbbrevDecl = U.tryExtractCUAbbrevFast();
+ assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
}
- AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
if (!AbbrevDecl) {
- U.getContext().getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "contains invalid abbreviation %" PRIu64 " at "
- "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
- U.getOffset(), AbbrCode, *OffsetPtr,
- AbbrevSet->getCodeRange().c_str()));
- // Restore the original offset.
- *OffsetPtr = Offset;
- return false;
+ const auto *AbbrevSet = U.getAbbreviations();
+ if (!AbbrevSet) {
+ U.getContext().getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation set offset 0x%" PRIx64,
+ U.getOffset(), U.getAbbreviationsOffset()));
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
+ AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
+ if (!AbbrevDecl) {
+ U.getContext().getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation %" PRIu64 " at "
+ "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+ U.getOffset(), AbbrCode, *OffsetPtr,
+ AbbrevSet->getCodeRange().c_str()));
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
}
+
// See if all attributes in this DIE have fixed byte sizes. If so, we can
// just add this size to the offset to skip to the next DIE.
if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557bd..dcf323525b10ee 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,6 +1051,17 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
return nullptr;
}
+const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
+ Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
+ Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
+ if (!AbbrevOrError) {
+ // FIXME: We should propagate this error upwards.
+ consumeError(AbbrevOrError.takeError());
+ return nullptr;
+ }
+ return *AbbrevOrError;
+}
+
const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
if (!Abbrevs) {
Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError =
``````````
</details>
https://github.com/llvm/llvm-project/pull/108757
More information about the llvm-commits
mailing list