[llvm] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit abbrevs (PR #108757)

Daniel Bertalan via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 01:42:34 PDT 2024


https://github.com/BertalanD updated https://github.com/llvm/llvm-project/pull/108757

>From d3c7baeeb8d9b2a64344683c5b92f108198e9de7 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sun, 15 Sep 2024 12:33:35 +0200
Subject: [PATCH 1/3] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit
 abbrevs

In `DWARFDebugInfoEntry::extractFast`, we were parsing all abbreviation
declarations belonging to the compilation unit by calling the
`getAbbreviations` method. This resulted in a large overhead (mostly
vector resizes and ULEB128 parsing) in cases where only the Compilation
Unit DIE ended up being used.

As `DW_TAG_compile_unit` typically comes first in the abbreviation
table, this commit adds a fast-path function (`tryExtractCUAbbrevFast`)
which attempts to read only the first abbreviation, without constructing
a full `DWARFAbbreviationDeclarationSet`.

This significantly speeds up `ld64.lld`'s generation of `N_OSO` stab
information (which needs `DW_AT_name` from the Compilation Unit DIE).
The following measurement was taken on an M1 Mac Mini linking Chromium
with full debug info:

  x: before
  +: after

      N           Min           Max        Median           Avg        Stddev
  x  15      3.136759      4.390569     3.5234511     3.6028554    0.38726359
  +  15     2.7222703     3.5872169      3.237128     3.1830136    0.31002649
  Difference at 95.0% confidence
      -0.419842 +/- 0.26232
      -11.653% +/- 7.28088%
      (Student's t, pooled s = 0.350777)
---
 .../llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h   |  4 ++
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h |  4 ++
 llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 18 ++++++
 .../DebugInfo/DWARF/DWARFDebugInfoEntry.cpp   | 57 ++++++++++++-------
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp        | 11 ++++
 5 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 6439827ef70f0f..18555bafdc1f01 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -62,6 +62,7 @@ class DWARFDebugAbbrev {
   mutable DWARFAbbreviationDeclarationSetMap AbbrDeclSets;
   mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos;
   mutable std::optional<DataExtractor> Data;
+  mutable std::map<uint64_t, DWARFAbbreviationDeclaration> CUAbbrevs;
 
 public:
   DWARFDebugAbbrev(DataExtractor Data);
@@ -69,6 +70,9 @@ class DWARFDebugAbbrev {
   Expected<const DWARFAbbreviationDeclarationSet *>
   getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
 
+  Expected<const DWARFAbbreviationDeclaration *>
+  tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const;
+
   void dump(raw_ostream &OS) const;
   Error parse() const;
 
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea893123..87f8742fd9d9f0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -419,6 +419,10 @@ class DWARFUnit {
 
   uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
 
+  /// Extracts only the abbreviation declaration with code 1, which is
+  /// typically the compile unit DIE (DW_TAG_compile_unit).
+  const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+
   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
 
   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 85959ecc5e17f1..7944fc881e6bd1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -168,3 +168,21 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
           .first;
   return &PrevAbbrOffsetPos->second;
 }
+
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
+  if (auto AbbrevDecl = CUAbbrevs.find(CUAbbrOffset);
+      AbbrevDecl != CUAbbrevs.end())
+    return &AbbrevDecl->second;
+
+  DWARFAbbreviationDeclaration Decl;
+  uint64_t Offset = CUAbbrOffset;
+  Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
+      Decl.extract(*Data, &Offset);
+  if (!ES)
+    return ES.takeError();
+  if (Decl.getCode() != 1)
+    return nullptr;
+
+  return &(CUAbbrevs[CUAbbrOffset] = std::move(Decl));
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 4f0a6d96ace9e2..030faad13f46f6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -34,36 +34,49 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
     return false;
   }
   assert(DebugInfoData.isValidOffset(UEndOffset - 1));
+  AbbrevDecl = nullptr;
+
   uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
   if (0 == AbbrCode) {
     // NULL debug tag entry.
-    AbbrevDecl = nullptr;
     return true;
   }
-  const auto *AbbrevSet = U.getAbbreviations();
-  if (!AbbrevSet) {
-    U.getContext().getWarningHandler()(
-        createStringError(errc::invalid_argument,
-                          "DWARF unit at offset 0x%8.8" PRIx64 " "
-                          "contains invalid abbreviation set offset 0x%" PRIx64,
-                          U.getOffset(), U.getAbbreviationsOffset()));
-    // Restore the original offset.
-    *OffsetPtr = Offset;
-    return false;
+
+  // Fast path: parsing the entire abbreviation table is wasteful if we only
+  // need the unit DIE (typically AbbrCode == 1).
+  if (1 == AbbrCode) {
+    AbbrevDecl = U.tryExtractCUAbbrevFast();
+    assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
   }
-  AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
   if (!AbbrevDecl) {
-    U.getContext().getWarningHandler()(
-        createStringError(errc::invalid_argument,
-                          "DWARF unit at offset 0x%8.8" PRIx64 " "
-                          "contains invalid abbreviation %" PRIu64 " at "
-                          "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
-                          U.getOffset(), AbbrCode, *OffsetPtr,
-                          AbbrevSet->getCodeRange().c_str()));
-    // Restore the original offset.
-    *OffsetPtr = Offset;
-    return false;
+    const auto *AbbrevSet = U.getAbbreviations();
+    if (!AbbrevSet) {
+      U.getContext().getWarningHandler()(createStringError(
+          errc::invalid_argument,
+          "DWARF unit at offset 0x%8.8" PRIx64 " "
+          "contains invalid abbreviation set offset 0x%" PRIx64,
+          U.getOffset(), U.getAbbreviationsOffset()));
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
+    AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
+    if (!AbbrevDecl) {
+      U.getContext().getWarningHandler()(createStringError(
+          errc::invalid_argument,
+          "DWARF unit at offset 0x%8.8" PRIx64 " "
+          "contains invalid abbreviation %" PRIu64 " at "
+          "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+          U.getOffset(), AbbrCode, *OffsetPtr,
+          AbbrevSet->getCodeRange().c_str()));
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
   }
+
   // See if all attributes in this DIE have fixed byte sizes. If so, we can
   // just add this size to the offset to skip to the next DIE.
   if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557bd..dcf323525b10ee 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,6 +1051,17 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
   return nullptr;
 }
 
+const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
+  Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
+      Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
+  if (!AbbrevOrError) {
+    // FIXME: We should propagate this error upwards.
+    consumeError(AbbrevOrError.takeError());
+    return nullptr;
+  }
+  return *AbbrevOrError;
+}
+
 const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
   if (!Abbrevs) {
     Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError =

>From ca8106a8c2f36e9251cf85e81a8fd81ad0ca2e2d Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Sun, 15 Sep 2024 22:02:59 +0200
Subject: [PATCH 2/3] Don't crash on empty optional deref

---
 llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 7944fc881e6bd1..70fd5c1a8ce331 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -175,6 +175,10 @@ DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
       AbbrevDecl != CUAbbrevs.end())
     return &AbbrevDecl->second;
 
+  if (!Data || CUAbbrOffset >= Data->getData().size())
+    return make_error<llvm::object::GenericBinaryError>(
+        "the abbreviation offset into the .debug_abbrev section is not valid");
+
   DWARFAbbreviationDeclaration Decl;
   uint64_t Offset = CUAbbrOffset;
   Expected<DWARFAbbreviationDeclaration::ExtractState> ES =

>From ba0d7f4165d9091b30f5293596f147abd6bf4545 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani at danielbertalan.dev>
Date: Tue, 17 Sep 2024 10:38:38 +0200
Subject: [PATCH 3/3] Propagate error from tryExtractCUAbbrevFast()

... but still drop it on the floor in the end, as we want to re-try the
full DIE parsing.
---
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h    |  2 +-
 llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 13 ++++++++++---
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp           | 12 +++---------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 87f8742fd9d9f0..8c019841035a12 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -421,7 +421,7 @@ class DWARFUnit {
 
   /// Extracts only the abbreviation declaration with code 1, which is
   /// typically the compile unit DIE (DW_TAG_compile_unit).
-  const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+  Expected<const DWARFAbbreviationDeclaration *> tryExtractCUAbbrevFast() const;
 
   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
 
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 030faad13f46f6..c5156e55393f27 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
 #include <cstddef>
 #include <cstdint>
 
@@ -44,9 +45,13 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
 
   // Fast path: parsing the entire abbreviation table is wasteful if we only
   // need the unit DIE (typically AbbrCode == 1).
-  if (1 == AbbrCode) {
-    AbbrevDecl = U.tryExtractCUAbbrevFast();
-    assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
+  if (AbbrCode == 1) {
+    Expected<const DWARFAbbreviationDeclaration *> DeclOrError =
+        U.tryExtractCUAbbrevFast();
+    if (DeclOrError)
+      AbbrevDecl = *DeclOrError;
+    else
+      consumeError(DeclOrError.takeError()); // Retry full parsing below.
   }
 
   if (!AbbrevDecl) {
@@ -77,6 +82,8 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
     }
   }
 
+  assert(AbbrevDecl && AbbrevDecl->getCode() == AbbrCode);
+
   // See if all attributes in this DIE have fixed byte sizes. If so, we can
   // just add this size to the offset to skip to the next DIE.
   if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index dcf323525b10ee..aab3f5f0783044 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,15 +1051,9 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
   return nullptr;
 }
 
-const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
-  Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
-      Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
-  if (!AbbrevOrError) {
-    // FIXME: We should propagate this error upwards.
-    consumeError(AbbrevOrError.takeError());
-    return nullptr;
-  }
-  return *AbbrevOrError;
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFUnit::tryExtractCUAbbrevFast() const {
+  return Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
 }
 
 const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {



More information about the llvm-commits mailing list