[llvm] [BOLT][DWARF] Sort GDBIndexTUEntryVector (PR #101264)

Sayhaan Siddiqui via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 30 16:55:48 PDT 2024


https://github.com/sayhaan created https://github.com/llvm/llvm-project/pull/101264

Sorts GDBIndexTUEntryVector in decreasing order by hash to ensure determinism when parallelized.

>From 9afce89392bac3656f5a5d52548c110a1bdd2a2b Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Tue, 30 Jul 2024 14:24:00 -0700
Subject: [PATCH 1/4] [BOLT][DWARF][NFC] Split DIEBuilder::finish

---
 bolt/include/bolt/Core/DIEBuilder.h | 11 +++--
 bolt/lib/Core/DIEBuilder.cpp        | 64 +++++++++++++++++++++--------
 bolt/lib/Rewrite/DWARFRewriter.cpp  | 22 +++++++---
 3 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 0b840c142ed81..4895d384e3c92 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -207,9 +207,11 @@ class DIEBuilder {
   /// Along with current CU, and DIE being processed and the new DIE offset to
   /// be updated, it takes in Parents vector that can be empty if this DIE has
   /// no parents.
-  uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die,
-                        std::optional<BOLTDWARF5AccelTableData *> Parent,
-                        uint32_t NumberParentsInChain, uint32_t &CurOffset);
+  uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset);
+
+  void populateDebugNamesTable(DWARFUnit &CU, DIE &Die,
+                               std::optional<BOLTDWARF5AccelTableData *> Parent,
+                               uint32_t NumberParentsInChain);
 
   void registerUnit(DWARFUnit &DU, bool NeedSort);
 
@@ -338,6 +340,9 @@ class DIEBuilder {
   /// Finish current DIE construction.
   void finish();
 
+  /// Update debug names table.
+  void updateDebugNamesTable();
+
   // Interface to edit DIE
   template <class T> T *allocateDIEValue() {
     return new (getState().DIEAlloc) T;
diff --git a/bolt/lib/Core/DIEBuilder.cpp b/bolt/lib/Core/DIEBuilder.cpp
index 8f6195f6b6ea1..4f0a97cd31b74 100644
--- a/bolt/lib/Core/DIEBuilder.cpp
+++ b/bolt/lib/Core/DIEBuilder.cpp
@@ -461,17 +461,11 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx,
   return nullptr;
 }
 
-uint32_t
-DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
-                         std::optional<BOLTDWARF5AccelTableData *> Parent,
-                         uint32_t NumberParentsInChain, uint32_t &CurOffset) {
+uint32_t DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
+                                  uint32_t &CurOffset) {
   getState().DWARFDieAddressesParsed.erase(Die.getOffset());
   uint32_t CurSize = 0;
   Die.setOffset(CurOffset);
-  std::optional<BOLTDWARF5AccelTableData *> NameEntry =
-      DebugNamesTable.addAccelTableEntry(
-          CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt,
-          NumberParentsInChain, Parent);
   // It is possible that an indexed debugging information entry has a parent
   // that is not indexed (for example, if its parent does not have a name
   // attribute). In such a case, a parent attribute may point to a nameless
@@ -485,18 +479,13 @@ DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
   // If Parent is nullopt and NumberParentsInChain is not zero, then forward
   // declaration was encountered in this DF traversal. Propagating nullopt for
   // Parent to children.
-  if (!Parent && NumberParentsInChain)
-    NameEntry = std::nullopt;
-  if (NameEntry)
-    ++NumberParentsInChain;
   for (DIEValue &Val : Die.values())
     CurSize += Val.sizeOf(CU.getFormParams());
   CurSize += getULEB128Size(Die.getAbbrevNumber());
   CurOffset += CurSize;
 
   for (DIE &Child : Die.children()) {
-    uint32_t ChildSize =
-        finalizeDIEs(CU, Child, NameEntry, NumberParentsInChain, CurOffset);
+    uint32_t ChildSize = finalizeDIEs(CU, Child, CurOffset);
     CurSize += ChildSize;
   }
   // for children end mark.
@@ -514,10 +503,9 @@ void DIEBuilder::finish() {
     DIE *UnitDIE = getUnitDIEbyUnit(CU);
     uint32_t HeaderSize = CU.getHeaderSize();
     uint32_t CurOffset = HeaderSize;
-    DebugNamesTable.setCurrentUnit(CU, UnitStartOffset);
     std::vector<std::optional<BOLTDWARF5AccelTableData *>> Parents;
     Parents.push_back(std::nullopt);
-    finalizeDIEs(CU, *UnitDIE, std::nullopt, 0, CurOffset);
+    finalizeDIEs(CU, *UnitDIE, CurOffset);
 
     DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU);
     CurUnitInfo.UnitOffset = UnitStartOffset;
@@ -535,11 +523,12 @@ void DIEBuilder::finish() {
     finalizeCU(*CU, TypeUnitStartOffset);
   }
 
+  uint64_t UnitSizes = UnitSize;
   for (DWARFUnit *CU : getState().DUList) {
     // Skipping DWARF4 types.
     if (CU->getVersion() < 5 && CU->isTypeUnit())
       continue;
-    finalizeCU(*CU, UnitSize);
+    finalizeCU(*CU, UnitSizes);
   }
   if (opts::Verbosity >= 1) {
     if (!getState().DWARFDieAddressesParsed.empty())
@@ -548,6 +537,47 @@ void DIEBuilder::finish() {
       dbgs() << Twine::utohexstr(Address) << "\n";
     }
   }
+}
+
+void DIEBuilder::populateDebugNamesTable(
+    DWARFUnit &CU, DIE &Die, std::optional<BOLTDWARF5AccelTableData *> Parent,
+    uint32_t NumberParentsInChain) {
+  std::optional<BOLTDWARF5AccelTableData *> NameEntry =
+      DebugNamesTable.addAccelTableEntry(
+          CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt,
+          NumberParentsInChain, Parent);
+  if (!Parent && NumberParentsInChain)
+    NameEntry = std::nullopt;
+  if (NameEntry)
+    ++NumberParentsInChain;
+
+  for (DIE &Child : Die.children())
+    populateDebugNamesTable(CU, Child, NameEntry, NumberParentsInChain);
+}
+
+void DIEBuilder::updateDebugNamesTable() {
+  auto finalizeDebugNamesTableForCU = [&](DWARFUnit &CU,
+                                          uint64_t &UnitStartOffset) -> void {
+    DIE *UnitDIE = getUnitDIEbyUnit(CU);
+    DebugNamesTable.setCurrentUnit(CU, UnitStartOffset);
+    populateDebugNamesTable(CU, *UnitDIE, std::nullopt, 0);
+
+    DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU);
+    UnitStartOffset += CurUnitInfo.UnitLength;
+  };
+
+  uint64_t TypeUnitStartOffset = 0;
+  for (DWARFUnit *CU : getState().DUList) {
+    if (!(CU->getVersion() < 5 && CU->isTypeUnit()))
+      break;
+    finalizeDebugNamesTableForCU(*CU, TypeUnitStartOffset);
+  }
+
+  for (DWARFUnit *CU : getState().DUList) {
+    if (CU->getVersion() < 5 && CU->isTypeUnit())
+      continue;
+    finalizeDebugNamesTableForCU(*CU, UnitSize);
+  }
   updateReferences();
 }
 
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index beef1a8f902ad..f20aec81eb259 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -673,9 +673,8 @@ void DWARFRewriter::updateDebugInfo() {
                             DebugRangesSectionWriter &TempRangesSectionWriter,
                             DebugAddrWriter &AddressWriter,
                             const std::string &DWOName,
-                            const std::optional<std::string> &DwarfOutputPath) {
-    DIEBuilder DWODIEBuilder(BC, &(SplitCU).getContext(), DebugNamesTable,
-                             &Unit);
+                            const std::optional<std::string> &DwarfOutputPath,
+                            DIEBuilder &DWODIEBuilder) {
     DWODIEBuilder.buildDWOUnit(SplitCU);
     DebugStrOffsetsWriter DWOStrOffstsWriter(BC);
     DebugStrWriter DWOStrWriter((SplitCU).getContext(), true);
@@ -740,6 +739,7 @@ void DWARFRewriter::updateDebugInfo() {
       finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection);
 
   CUPartitionVector PartVec = partitionCUs(*BC.DwCtx);
+  llvm::DenseMap<uint64_t, std::unique_ptr<DIEBuilder>> DWODIEBuildersByCU;
   for (std::vector<DWARFUnit *> &Vec : PartVec) {
     DIEBlder.buildCompileUnits(Vec);
     for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) {
@@ -761,13 +761,23 @@ void DWARFRewriter::updateDebugInfo() {
               : std::optional<std::string>(opts::DwarfOutputPath.c_str());
       std::string DWOName = DIEBlder.updateDWONameCompDir(
           *StrOffstsWriter, *StrWriter, *CU, DwarfOutputPath, std::nullopt);
+      auto DWODIEBuilderPtr = std::make_unique<DIEBuilder>(
+          BC, &(**SplitCU).getContext(), DebugNamesTable, CU);
+      DWODIEBuildersByCU[CU->getOffset()] = std::move(DWODIEBuilderPtr);
+      DIEBuilder &DWODIEBuilder = *DWODIEBuildersByCU[CU->getOffset()].get();
       if (CU->getVersion() >= 5)
         StrOffstsWriter->finalizeSection(*CU, DIEBlder);
       processSplitCU(*CU, **SplitCU, DIEBlder, *TempRangesSectionWriter,
-                     AddressWriter, DWOName, DwarfOutputPath);
+                     AddressWriter, DWOName, DwarfOutputPath, DWODIEBuilder);
     }
-    for (DWARFUnit *CU : DIEBlder.getProcessedCUs())
+    for (DWARFUnit *CU : DIEBlder.getProcessedCUs()) {
+      auto DWODIEBuilderIterator = DWODIEBuildersByCU.find(CU->getOffset());
+      if (DWODIEBuilderIterator != DWODIEBuildersByCU.end()) {
+        DIEBuilder &DWODIEBuilder = *DWODIEBuilderIterator->second.get();
+        DWODIEBuilder.updateDebugNamesTable();
+      }
       processMainBinaryCU(*CU, DIEBlder);
+    }
     finalizeCompileUnits(DIEBlder, *Streamer, OffsetMap,
                          DIEBlder.getProcessedCUs(), *FinalAddrWriter);
   }
@@ -1468,6 +1478,7 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
   // generate and populate abbrevs here
   DIEBlder.generateAbbrevs();
   DIEBlder.finish();
+  DIEBlder.updateDebugNamesTable();
   SmallVector<char, 20> OutBuffer;
   std::shared_ptr<raw_svector_ostream> ObjOS =
       std::make_shared<raw_svector_ostream>(OutBuffer);
@@ -1672,6 +1683,7 @@ void DWARFRewriter::finalizeCompileUnits(DIEBuilder &DIEBlder,
   }
   DIEBlder.generateAbbrevs();
   DIEBlder.finish();
+  DIEBlder.updateDebugNamesTable();
   // generate debug_info and CUMap
   for (DWARFUnit *CU : CUs) {
     emitUnit(DIEBlder, Streamer, *CU);

>From 6206814b2e6892fab323c56af52251ccc1d5b57d Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Tue, 30 Jul 2024 14:33:07 -0700
Subject: [PATCH 2/4] Update comments

---
 bolt/include/bolt/Core/DIEBuilder.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 4895d384e3c92..3d19d198d4dd7 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -203,12 +203,13 @@ class DIEBuilder {
   /// Update references once the layout is finalized.
   void updateReferences();
 
-  /// Update the Offset and Size of DIE, populate DebugNames table.
+  /// Update the Offset and Size of DIE.
   /// Along with current CU, and DIE being processed and the new DIE offset to
   /// be updated, it takes in Parents vector that can be empty if this DIE has
   /// no parents.
   uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset);
 
+  /// Populates DebugNames table
   void populateDebugNamesTable(DWARFUnit &CU, DIE &Die,
                                std::optional<BOLTDWARF5AccelTableData *> Parent,
                                uint32_t NumberParentsInChain);

>From 3a734cd6d2034376965fa3f2f3b9ce1a85afabee Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Tue, 30 Jul 2024 14:44:32 -0700
Subject: [PATCH 3/4] Formatting change

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:


Differential Revision: https://phabricator.intern.facebook.com/D60474903
---
 bolt/include/bolt/Core/DIEBuilder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/include/bolt/Core/DIEBuilder.h b/bolt/include/bolt/Core/DIEBuilder.h
index 3d19d198d4dd7..5d565e8cec20b 100644
--- a/bolt/include/bolt/Core/DIEBuilder.h
+++ b/bolt/include/bolt/Core/DIEBuilder.h
@@ -209,7 +209,7 @@ class DIEBuilder {
   /// no parents.
   uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die, uint32_t &CurOffset);
 
-  /// Populates DebugNames table
+  /// Populates DebugNames table.
   void populateDebugNamesTable(DWARFUnit &CU, DIE &Die,
                                std::optional<BOLTDWARF5AccelTableData *> Parent,
                                uint32_t NumberParentsInChain);

>From e4bcea4635ea1f61f6fe3c1536889a550ddf14df Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Tue, 30 Jul 2024 16:54:38 -0700
Subject: [PATCH 4/4] [BOLT][DWARF] Sort GDBIndexTUEntryVector

---
 bolt/include/bolt/Core/GDBIndex.h                         | 8 ++++++++
 bolt/lib/Core/GDBIndex.cpp                                | 3 +--
 .../X86/dwarf4-split-gdb-index-types-gdb-generated.test   | 8 ++++----
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/bolt/include/bolt/Core/GDBIndex.h b/bolt/include/bolt/Core/GDBIndex.h
index 6604c2a11472d..0ebcf4ecfe99e 100644
--- a/bolt/include/bolt/Core/GDBIndex.h
+++ b/bolt/include/bolt/Core/GDBIndex.h
@@ -53,6 +53,14 @@ class GDBIndex {
   const GDBIndexTUEntryType &getGDBIndexTUEntryVector() const {
     return GDBIndexTUEntryVector;
   }
+
+  /// Sorts entries in GDBIndexTUEntryVector according to the TypeHash.
+  void sortGDBIndexTUEntryVector() {
+    llvm::stable_sort(GDBIndexTUEntryVector, [](const GDBIndexTUEntry &LHS,
+                                                const GDBIndexTUEntry &RHS) {
+      return LHS.TypeHash > RHS.TypeHash;
+    });
+  }
 };
 
 } // namespace bolt
diff --git a/bolt/lib/Core/GDBIndex.cpp b/bolt/lib/Core/GDBIndex.cpp
index 9e6d24167d559..c7fb4889646b4 100644
--- a/bolt/lib/Core/GDBIndex.cpp
+++ b/bolt/lib/Core/GDBIndex.cpp
@@ -23,7 +23,6 @@ void GDBIndex::updateGdbIndexSection(
     DebugARangesSectionWriter &ARangesSectionWriter) {
   if (!BC.getGdbIndexSection())
     return;
-
   // See https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
   // for .gdb_index section format.
 
@@ -141,7 +140,7 @@ void GDBIndex::updateGdbIndexSection(
     write64le(Buffer + 8, CUInfo.second.Length + 4);
     Buffer += 16;
   }
-
+  sortGDBIndexTUEntryVector();
   // Rewrite TU CU List, since abbrevs can be different.
   // Entry example:
   // 0: offset = 0x00000000, type_offset = 0x0000001e, type_signature =
diff --git a/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test b/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test
index c9b12574caa3a..6caf5870fca02 100644
--- a/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test
+++ b/bolt/test/X86/dwarf4-split-gdb-index-types-gdb-generated.test
@@ -17,10 +17,10 @@
 # POSTCHECK-NEXT:         0: Offset = 0x0, Length = 0x34
 # POSTCHECK-NEXT:         1: Offset = 0x34, Length = 0x34
 # POSTCHECK:          Types CU list offset = 0x38, has 4 entries
-# POSTCHECK-NEXT:       0: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x675d23e4f33235f2
-# POSTCHECK-NEXT:       1: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0x49dc260088be7e56
-# POSTCHECK-NEXT:       2: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x104ec427d2ebea6f
-# POSTCHECK-NEXT:       3: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0xb4580bc1535df1e4
+# POSTCHECK-NEXT:       0: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0xb4580bc1535df1e4
+# POSTCHECK-NEXT:       1: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x675d23e4f33235f2
+# POSTCHECK-NEXT:       2: offset = 0x0000004a, type_offset = 0x0000001e, type_signature = 0x49dc260088be7e56
+# POSTCHECK-NEXT:       3: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x104ec427d2ebea6f
 # POSTCHECK:          Address area offset = 0x98, has 2 entries
 # POSTCHECK-NEXT:         Low/High address = [0x[[#%.4x,ADDR:]],
 # POSTCHECK-SAME:           0x[[#ADDR + 0x7a]]) (Size: 0x7a), CU id = 0



More information about the llvm-commits mailing list