[llvm] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit abbrevs (PR #108757)
Daniel Bertalan via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 01:42:34 PDT 2024
https://github.com/BertalanD updated https://github.com/llvm/llvm-project/pull/108757
>From d3c7baeeb8d9b2a64344683c5b92f108198e9de7 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sun, 15 Sep 2024 12:33:35 +0200
Subject: [PATCH 1/3] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit
abbrevs
In `DWARFDebugInfoEntry::extractFast`, we were parsing all abbreviation
declarations belonging to the compilation unit by calling the
`getAbbreviations` method. This resulted in a large overhead (mostly
vector resizes and ULEB128 parsing) in cases where only the Compilation
Unit DIE ended up being used.
As `DW_TAG_compile_unit` typically comes first in the abbreviation
table, this commit adds a fast-path function (`tryExtractCUAbbrevFast`)
which attempts to read only the first abbreviation, without constructing
a full `DWARFAbbreviationDeclarationSet`.
This significantly speeds up `ld64.lld`'s generation of `N_OSO` stab
information (which needs `DW_AT_name` from the Compilation Unit DIE).
The following measurement was taken on an M1 Mac Mini linking Chromium
with full debug info:
x: before
+: after
N Min Max Median Avg Stddev
x 15 3.136759 4.390569 3.5234511 3.6028554 0.38726359
+ 15 2.7222703 3.5872169 3.237128 3.1830136 0.31002649
Difference at 95.0% confidence
-0.419842 +/- 0.26232
-11.653% +/- 7.28088%
(Student's t, pooled s = 0.350777)
---
.../llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h | 4 ++
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 4 ++
llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 18 ++++++
.../DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 57 ++++++++++++-------
llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 11 ++++
5 files changed, 72 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 6439827ef70f0f..18555bafdc1f01 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -62,6 +62,7 @@ class DWARFDebugAbbrev {
mutable DWARFAbbreviationDeclarationSetMap AbbrDeclSets;
mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos;
mutable std::optional<DataExtractor> Data;
+ mutable std::map<uint64_t, DWARFAbbreviationDeclaration> CUAbbrevs;
public:
DWARFDebugAbbrev(DataExtractor Data);
@@ -69,6 +70,9 @@ class DWARFDebugAbbrev {
Expected<const DWARFAbbreviationDeclarationSet *>
getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
+ Expected<const DWARFAbbreviationDeclaration *>
+ tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const;
+
void dump(raw_ostream &OS) const;
Error parse() const;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea893123..87f8742fd9d9f0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -419,6 +419,10 @@ class DWARFUnit {
uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
+ /// Extracts only the abbreviation declaration with code 1, which is
+ /// typically the compile unit DIE (DW_TAG_compile_unit).
+ const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+
const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 85959ecc5e17f1..7944fc881e6bd1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -168,3 +168,21 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
.first;
return &PrevAbbrOffsetPos->second;
}
+
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
+ if (auto AbbrevDecl = CUAbbrevs.find(CUAbbrOffset);
+ AbbrevDecl != CUAbbrevs.end())
+ return &AbbrevDecl->second;
+
+ DWARFAbbreviationDeclaration Decl;
+ uint64_t Offset = CUAbbrOffset;
+ Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
+ Decl.extract(*Data, &Offset);
+ if (!ES)
+ return ES.takeError();
+ if (Decl.getCode() != 1)
+ return nullptr;
+
+ return &(CUAbbrevs[CUAbbrOffset] = std::move(Decl));
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 4f0a6d96ace9e2..030faad13f46f6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -34,36 +34,49 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
return false;
}
assert(DebugInfoData.isValidOffset(UEndOffset - 1));
+ AbbrevDecl = nullptr;
+
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
if (0 == AbbrCode) {
// NULL debug tag entry.
- AbbrevDecl = nullptr;
return true;
}
- const auto *AbbrevSet = U.getAbbreviations();
- if (!AbbrevSet) {
- U.getContext().getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "contains invalid abbreviation set offset 0x%" PRIx64,
- U.getOffset(), U.getAbbreviationsOffset()));
- // Restore the original offset.
- *OffsetPtr = Offset;
- return false;
+
+ // Fast path: parsing the entire abbreviation table is wasteful if we only
+ // need the unit DIE (typically AbbrCode == 1).
+ if (1 == AbbrCode) {
+ AbbrevDecl = U.tryExtractCUAbbrevFast();
+ assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
}
- AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
if (!AbbrevDecl) {
- U.getContext().getWarningHandler()(
- createStringError(errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "contains invalid abbreviation %" PRIu64 " at "
- "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
- U.getOffset(), AbbrCode, *OffsetPtr,
- AbbrevSet->getCodeRange().c_str()));
- // Restore the original offset.
- *OffsetPtr = Offset;
- return false;
+ const auto *AbbrevSet = U.getAbbreviations();
+ if (!AbbrevSet) {
+ U.getContext().getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation set offset 0x%" PRIx64,
+ U.getOffset(), U.getAbbreviationsOffset()));
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
+ AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
+ if (!AbbrevDecl) {
+ U.getContext().getWarningHandler()(createStringError(
+ errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64 " "
+ "contains invalid abbreviation %" PRIu64 " at "
+ "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+ U.getOffset(), AbbrCode, *OffsetPtr,
+ AbbrevSet->getCodeRange().c_str()));
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
}
+
// See if all attributes in this DIE have fixed byte sizes. If so, we can
// just add this size to the offset to skip to the next DIE.
if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557bd..dcf323525b10ee 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,6 +1051,17 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
return nullptr;
}
+const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
+ Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
+ Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
+ if (!AbbrevOrError) {
+ // FIXME: We should propagate this error upwards.
+ consumeError(AbbrevOrError.takeError());
+ return nullptr;
+ }
+ return *AbbrevOrError;
+}
+
const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
if (!Abbrevs) {
Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError =
>From ca8106a8c2f36e9251cf85e81a8fd81ad0ca2e2d Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sun, 15 Sep 2024 22:02:59 +0200
Subject: [PATCH 2/3] Don't crash on empty optional deref
---
llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 7944fc881e6bd1..70fd5c1a8ce331 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -175,6 +175,10 @@ DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
AbbrevDecl != CUAbbrevs.end())
return &AbbrevDecl->second;
+ if (!Data || CUAbbrOffset >= Data->getData().size())
+ return make_error<llvm::object::GenericBinaryError>(
+ "the abbreviation offset into the .debug_abbrev section is not valid");
+
DWARFAbbreviationDeclaration Decl;
uint64_t Offset = CUAbbrOffset;
Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
>From ba0d7f4165d9091b30f5293596f147abd6bf4545 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Tue, 17 Sep 2024 10:38:38 +0200
Subject: [PATCH 3/3] Propagate error from tryExtractCUAbbrevFast()
... but still drop it on the floor in the end, as we want to re-try the
full DIE parsing.
---
llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 2 +-
llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 13 ++++++++++---
llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 12 +++---------
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 87f8742fd9d9f0..8c019841035a12 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -421,7 +421,7 @@ class DWARFUnit {
/// Extracts only the abbreviation declaration with code 1, which is
/// typically the compile unit DIE (DW_TAG_compile_unit).
- const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+ Expected<const DWARFAbbreviationDeclaration *> tryExtractCUAbbrevFast() const;
const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 030faad13f46f6..c5156e55393f27 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
#include <cstddef>
#include <cstdint>
@@ -44,9 +45,13 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
// Fast path: parsing the entire abbreviation table is wasteful if we only
// need the unit DIE (typically AbbrCode == 1).
- if (1 == AbbrCode) {
- AbbrevDecl = U.tryExtractCUAbbrevFast();
- assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
+ if (AbbrCode == 1) {
+ Expected<const DWARFAbbreviationDeclaration *> DeclOrError =
+ U.tryExtractCUAbbrevFast();
+ if (DeclOrError)
+ AbbrevDecl = *DeclOrError;
+ else
+ consumeError(DeclOrError.takeError()); // Retry full parsing below.
}
if (!AbbrevDecl) {
@@ -77,6 +82,8 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
}
}
+ assert(AbbrevDecl && AbbrevDecl->getCode() == AbbrCode);
+
// See if all attributes in this DIE have fixed byte sizes. If so, we can
// just add this size to the offset to skip to the next DIE.
if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index dcf323525b10ee..aab3f5f0783044 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,15 +1051,9 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
return nullptr;
}
-const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
- Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
- Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
- if (!AbbrevOrError) {
- // FIXME: We should propagate this error upwards.
- consumeError(AbbrevOrError.takeError());
- return nullptr;
- }
- return *AbbrevOrError;
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFUnit::tryExtractCUAbbrevFast() const {
+ return Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
}
const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
More information about the llvm-commits
mailing list