[llvm] [DebugInfo] Add fast path for parsing DW_TAG_compile_unit abbrevs (PR #108757)

via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 15 07:10:30 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld-macho

Author: Daniel Bertalan (BertalanD)

<details>
<summary>Changes</summary>

In `DWARFDebugInfoEntry::extractFast`, we were parsing all abbreviation declarations belonging to the compilation unit by calling the `getAbbreviations` method. This resulted in a large overhead (mostly vector resizes and ULEB128 parsing) in cases where only the Compilation Unit DIE ended up being used.

As `DW_TAG_compile_unit` typically comes first in the abbreviation table, this commit adds a fast-path function (`tryExtractCUAbbrevFast`) which attempts to read only the first abbreviation, without constructing a full `DWARFAbbreviationDeclarationSet`.

This significantly speeds up `ld64.lld`'s generation of `N_OSO` stab information (which needs `DW_AT_name` from the Compilation Unit DIE). The following measurement was taken on an M1 Mac Mini linking Chromium with full debug info:

  x: before
  +: after

      N           Min           Max        Median           Avg        Stddev
  x  15      3.136759      4.390569     3.5234511     3.6028554    0.38726359
  +  15     2.7222703     3.5872169      3.237128     3.1830136    0.31002649
  Difference at 95.0% confidence
      -0.419842 +/- 0.26232
      -11.653% +/- 7.28088%
      (Student's t, pooled s = 0.350777)

---
Full diff: https://github.com/llvm/llvm-project/pull/108757.diff


5 Files Affected:

- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h (+4) 
- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h (+4) 
- (modified) llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp (+18) 
- (modified) llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp (+35-22) 
- (modified) llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp (+11) 


``````````diff
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 6439827ef70f0f..18555bafdc1f01 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -62,6 +62,7 @@ class DWARFDebugAbbrev {
   mutable DWARFAbbreviationDeclarationSetMap AbbrDeclSets;
   mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos;
   mutable std::optional<DataExtractor> Data;
+  mutable std::map<uint64_t, DWARFAbbreviationDeclaration> CUAbbrevs;
 
 public:
   DWARFDebugAbbrev(DataExtractor Data);
@@ -69,6 +70,9 @@ class DWARFDebugAbbrev {
   Expected<const DWARFAbbreviationDeclarationSet *>
   getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
 
+  Expected<const DWARFAbbreviationDeclaration *>
+  tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const;
+
   void dump(raw_ostream &OS) const;
   Error parse() const;
 
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea893123..87f8742fd9d9f0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -419,6 +419,10 @@ class DWARFUnit {
 
   uint64_t getAbbreviationsOffset() const { return Header.getAbbrOffset(); }
 
+  /// Extracts only the abbreviation declaration with code 1, which is
+  /// typically the compile unit DIE (DW_TAG_compile_unit).
+  const DWARFAbbreviationDeclaration *tryExtractCUAbbrevFast() const;
+
   const DWARFAbbreviationDeclarationSet *getAbbreviations() const;
 
   static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 85959ecc5e17f1..7944fc881e6bd1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -168,3 +168,21 @@ DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
           .first;
   return &PrevAbbrOffsetPos->second;
 }
+
+Expected<const DWARFAbbreviationDeclaration *>
+DWARFDebugAbbrev::tryExtractCUAbbrevFast(uint64_t CUAbbrOffset) const {
+  if (auto AbbrevDecl = CUAbbrevs.find(CUAbbrOffset);
+      AbbrevDecl != CUAbbrevs.end())
+    return &AbbrevDecl->second;
+
+  DWARFAbbreviationDeclaration Decl;
+  uint64_t Offset = CUAbbrOffset;
+  Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
+      Decl.extract(*Data, &Offset);
+  if (!ES)
+    return ES.takeError();
+  if (Decl.getCode() != 1)
+    return nullptr;
+
+  return &(CUAbbrevs[CUAbbrOffset] = std::move(Decl));
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 4f0a6d96ace9e2..030faad13f46f6 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -34,36 +34,49 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
     return false;
   }
   assert(DebugInfoData.isValidOffset(UEndOffset - 1));
+  AbbrevDecl = nullptr;
+
   uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
   if (0 == AbbrCode) {
     // NULL debug tag entry.
-    AbbrevDecl = nullptr;
     return true;
   }
-  const auto *AbbrevSet = U.getAbbreviations();
-  if (!AbbrevSet) {
-    U.getContext().getWarningHandler()(
-        createStringError(errc::invalid_argument,
-                          "DWARF unit at offset 0x%8.8" PRIx64 " "
-                          "contains invalid abbreviation set offset 0x%" PRIx64,
-                          U.getOffset(), U.getAbbreviationsOffset()));
-    // Restore the original offset.
-    *OffsetPtr = Offset;
-    return false;
+
+  // Fast path: parsing the entire abbreviation table is wasteful if we only
+  // need the unit DIE (typically AbbrCode == 1).
+  if (1 == AbbrCode) {
+    AbbrevDecl = U.tryExtractCUAbbrevFast();
+    assert(!AbbrevDecl || AbbrevDecl->getCode() == AbbrCode);
   }
-  AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
   if (!AbbrevDecl) {
-    U.getContext().getWarningHandler()(
-        createStringError(errc::invalid_argument,
-                          "DWARF unit at offset 0x%8.8" PRIx64 " "
-                          "contains invalid abbreviation %" PRIu64 " at "
-                          "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
-                          U.getOffset(), AbbrCode, *OffsetPtr,
-                          AbbrevSet->getCodeRange().c_str()));
-    // Restore the original offset.
-    *OffsetPtr = Offset;
-    return false;
+    const auto *AbbrevSet = U.getAbbreviations();
+    if (!AbbrevSet) {
+      U.getContext().getWarningHandler()(createStringError(
+          errc::invalid_argument,
+          "DWARF unit at offset 0x%8.8" PRIx64 " "
+          "contains invalid abbreviation set offset 0x%" PRIx64,
+          U.getOffset(), U.getAbbreviationsOffset()));
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
+    AbbrevDecl = AbbrevSet->getAbbreviationDeclaration(AbbrCode);
+
+    if (!AbbrevDecl) {
+      U.getContext().getWarningHandler()(createStringError(
+          errc::invalid_argument,
+          "DWARF unit at offset 0x%8.8" PRIx64 " "
+          "contains invalid abbreviation %" PRIu64 " at "
+          "offset 0x%8.8" PRIx64 ", valid abbreviations are %s",
+          U.getOffset(), AbbrCode, *OffsetPtr,
+          AbbrevSet->getCodeRange().c_str()));
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
   }
+
   // See if all attributes in this DIE have fixed byte sizes. If so, we can
   // just add this size to the offset to skip to the next DIE.
   if (std::optional<size_t> FixedSize =
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557bd..dcf323525b10ee 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1051,6 +1051,17 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
   return nullptr;
 }
 
+const DWARFAbbreviationDeclaration *DWARFUnit::tryExtractCUAbbrevFast() const {
+  Expected<const DWARFAbbreviationDeclaration *> AbbrevOrError =
+      Abbrev->tryExtractCUAbbrevFast(getAbbreviationsOffset());
+  if (!AbbrevOrError) {
+    // FIXME: We should propagate this error upwards.
+    consumeError(AbbrevOrError.takeError());
+    return nullptr;
+  }
+  return *AbbrevOrError;
+}
+
 const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
   if (!Abbrevs) {
     Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError =

``````````

</details>


https://github.com/llvm/llvm-project/pull/108757


More information about the llvm-commits mailing list