[llvm] Reduce llvm-gsymutil memory usage (PR #91023)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 09:14:19 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-debuginfo

Author: Kevin Frei (kevinfrei)

<details>
<summary>Changes</summary>

llvm-gsymutil eats a lot of RAM. On some large binaries, it causes OOM's on smaller hardware, consuming well over 64GB of RAM. This change frees line tables once we're done with them, and frees DWARFUnits's DIE's when we finish processing each DU, though they may get reconstituted if there are references from other DU's during processing. Once the conversion is complete, all DIE's are freed. The reduction in peak memory usage from these changes showed between 7-12% in my tests.

My testing of the double-checked locking around the creation & freeing of the data structures was tested on a 166 core system, validating that it trivially malfunctioned without the locks (or with stupid reordering of the locks) and worked reliably with them.

---
Full diff: https://github.com/llvm/llvm-project/pull/91023.diff


3 Files Affected:

- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h (+7-3) 
- (modified) llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp (+25-7) 
- (modified) llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (+14-1) 


``````````diff
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea893123..9614aab8bb9b50 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -22,6 +22,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/RWMutex.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -257,6 +258,9 @@ class DWARFUnit {
 
   std::shared_ptr<DWARFUnit> DWO;
 
+  mutable llvm::sys::RWMutex m_cu_die_array_mutex;
+  mutable llvm::sys::RWMutex m_all_die_array_mutex;
+
 protected:
   friend dwarf_linker::parallel::CompileUnit;
 
@@ -566,6 +570,9 @@ class DWARFUnit {
 
   Error tryExtractDIEsIfNeeded(bool CUDieOnly);
 
+  /// clearDIEs - Clear parsed DIEs to keep memory usage low.
+  void clearDIEs(bool KeepCUDie);
+
 private:
   /// Size in bytes of the .debug_info data associated with this compile unit.
   size_t getDebugInfoSize() const {
@@ -581,9 +588,6 @@ class DWARFUnit {
   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
                            std::vector<DWARFDebugInfoEntry> &DIEs) const;
 
-  /// clearDIEs - Clear parsed DIEs to keep memory usage low.
-  void clearDIEs(bool KeepCUDie);
-
   /// parseDWO - Parses .dwo file for current compile unit. Returns true if
   /// it was actually constructed.
   /// The \p AlternativeLocation specifies an alternative location to get
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557bd..cc79d9ec7130c9 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -496,13 +496,29 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
 }
 
 Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
-  if ((CUDieOnly && !DieArray.empty()) ||
-      DieArray.size() > 1)
-    return Error::success(); // Already parsed.
-
-  bool HasCUDie = !DieArray.empty();
-  extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
-
+  {
+    llvm::sys::ScopedReader Lock(m_cu_die_array_mutex);
+    if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
+      return Error::success(); // Already parsed.
+  }
+  bool HasCUDie = false;
+  {
+    llvm::sys::ScopedWriter Lock(m_cu_die_array_mutex);
+    if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
+      return Error::success(); // Already parsed.
+    HasCUDie = !DieArray.empty();
+    extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
+  }
+  {
+    llvm::sys::ScopedReader Lock(m_all_die_array_mutex);
+    if (DieArray.empty())
+      return Error::success();
+
+    // If CU DIE was just parsed, copy several attribute values from it.
+    if (HasCUDie)
+      return Error::success();
+  }
+  llvm::sys::ScopedWriter Lock(m_all_die_array_mutex);
   if (DieArray.empty())
     return Error::success();
 
@@ -658,6 +674,8 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
   // It depends on the implementation whether the request is fulfilled.
   // Create a new vector with a small capacity and assign it to the DieArray to
   // have previous contents freed.
+  llvm::sys::ScopedWriter CULock(m_cu_die_array_mutex);
+  llvm::sys::ScopedWriter AllLock(m_all_die_array_mutex);
   DieArray = (KeepCUDie && !DieArray.empty())
                  ? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
                  : std::vector<DWARFDebugInfoEntry>();
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 601686fdd3dd51..4a1ed91a7244f2 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -587,6 +587,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
       DWARFDie Die = getDie(*CU);
       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
       handleDie(Out, CUI, Die);
+      // Release the line table, once we're done.
+      DICtx.clearLineTableForUnit(CU.get());
+      // Free any DIEs that were allocated by the DWARF parser.
+      // If/when they're needed by other CU's, they'll be recreated.
+      CU->clearDIEs(false);
     }
   } else {
     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -612,11 +617,16 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
       DWARFDie Die = getDie(*CU);
       if (Die) {
         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
-        pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
+        pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
           std::string storage;
           raw_string_ostream StrStream(storage);
           OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
           handleDie(ThreadOut, CUI, Die);
+          // Release the line table once we're done.
+          DICtx.clearLineTableForUnit(CU.get());
+          // Free any DIEs that were allocated by the DWARF parser.
+          // If/when they're needed by other CU's, they'll be recreated.
+          CU->clearDIEs(false);
           // Print ThreadLogStorage lines into an actual stream under a lock
           std::lock_guard<std::mutex> guard(LogMutex);
           if (Out.GetOS()) {
@@ -629,6 +639,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
     }
     pool.wait();
   }
+  // Now get rid of all the DIEs that may have been recreated
+  for (const auto &CU : DICtx.compile_units())
+    CU->clearDIEs(false);
   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
   Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
   return Error::success();

``````````

</details>


https://github.com/llvm/llvm-project/pull/91023


More information about the llvm-commits mailing list