[llvm] 60cd3eb - Reduce llvm-gsymutil memory usage (#91023)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 2 10:14:29 PDT 2024


Author: Kevin Frei
Date: 2024-07-02T10:14:26-07:00
New Revision: 60cd3eb880fe48d192a58c64a1e38e875fc65377

URL: https://github.com/llvm/llvm-project/commit/60cd3eb880fe48d192a58c64a1e38e875fc65377
DIFF: https://github.com/llvm/llvm-project/commit/60cd3eb880fe48d192a58c64a1e38e875fc65377.diff

LOG: Reduce llvm-gsymutil memory usage (#91023)

llvm-gsymutil eats a lot of RAM. On some large binaries, it causes OOM's on smaller hardware, consuming well over 64GB of RAM. This change frees line tables once we're done with them, and frees DWARFUnits's DIE's when we finish processing each DU, though they may get reconstituted if there are references from other DU's during processing. Once the conversion is complete, all DIE's are freed. The reduction in peak memory usage from these changes showed between 7-12% in my tests.

The double-checked locking around the creation & freeing of the data structures was tested on a 166 core system. I validated that it trivially malfunctioned without the locks (and with stupid reordering of the locks) and worked reliably with them.

---------

Co-authored-by: Kevin Frei <freik at meta.com>

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
    llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
    llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 80c27aea89312..26ef7db718dd5 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -22,6 +22,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/RWMutex.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -257,6 +258,10 @@ class DWARFUnit {
 
   std::shared_ptr<DWARFUnit> DWO;
 
+  mutable llvm::sys::RWMutex FreeDIEsMutex;
+  mutable llvm::sys::RWMutex ExtractCUDieMutex;
+  mutable llvm::sys::RWMutex ExtractNonCUDIEsMutex;
+
 protected:
   friend dwarf_linker::parallel::CompileUnit;
 
@@ -566,6 +571,9 @@ class DWARFUnit {
 
   Error tryExtractDIEsIfNeeded(bool CUDieOnly);
 
+  /// clearDIEs - Clear parsed DIEs to keep memory usage low.
+  void clearDIEs(bool KeepCUDie);
+
 private:
   /// Size in bytes of the .debug_info data associated with this compile unit.
   size_t getDebugInfoSize() const {
@@ -577,13 +585,22 @@ class DWARFUnit {
   /// hasn't already been done
   void extractDIEsIfNeeded(bool CUDieOnly);
 
+  /// extracCUDieIfNeeded - Parse CU DIE if it hasn't already been done.
+  /// Only to be used from extractDIEsIfNeeded, which holds the correct locks.
+  bool extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie);
+
+  /// extractNonCUDIEsIfNeeded - Parses non-CU DIE's for a given CU if needed.
+  /// Only to be used from extractDIEsIfNeeded, which holds the correct locks.
+  Error extractNonCUDIEsIfNeeded(bool HasCUDie);
+
+  /// extractNonCUDIEsHelper - helper to be invoked *only* from inside
+  /// tryExtractDIEsIfNeeded, which holds the correct locks.
+  Error extractNonCUDIEsHelper();
+
   /// extractDIEsToVector - Appends all parsed DIEs to a vector.
   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
                            std::vector<DWARFDebugInfoEntry> &DIEs) const;
 
-  /// clearDIEs - Clear parsed DIEs to keep memory usage low.
-  void clearDIEs(bool KeepCUDie);
-
   /// parseDWO - Parses .dwo file for current compile unit. Returns true if
   /// it was actually constructed.
   /// The \p AlternativeLocation specifies an alternative location to get

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index bdd04b00f557b..2760cef7edfdb 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -495,21 +495,78 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
     Context.getRecoverableErrorHandler()(std::move(e));
 }
 
-Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
-  if ((CUDieOnly && !DieArray.empty()) ||
-      DieArray.size() > 1)
-    return Error::success(); // Already parsed.
+static bool DoubleCheckedRWLocker(llvm::sys::RWMutex &Mutex,
+                                  const std::function<bool()> &reader,
+                                  const std::function<void()> &writer) {
+  {
+    llvm::sys::ScopedReader Lock(Mutex);
+    if (reader())
+      return true;
+  }
+  llvm::sys::ScopedWriter Lock(Mutex);
+  if (reader())
+    return true;
+  // If we get here, then the reader function returned false. This means that
+  // no one else is currently writing to this data structure and it's safe for
+  // us to write to it now. The scoped writer lock guarantees there are no
+  // other readers or writers at this point.
+  writer();
+  return false;
+}
 
-  bool HasCUDie = !DieArray.empty();
-  extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
+// Helper to safely check if the Compile-Unit DIE has been extracted already.
+// If not, then extract it, and return false, indicating that it was *not*
+// already extracted.
+bool DWARFUnit::extractCUDieIfNeeded(bool CUDieOnly, bool &HasCUDie) {
+  return DoubleCheckedRWLocker(
+      ExtractCUDieMutex,
+      // Calculate if the CU DIE has been extracted already.
+      [&]() {
+        return ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1);
+      },
+      // Lambda to extract the CU DIE.
+      [&]() {
+        HasCUDie = !DieArray.empty();
+        extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
+      });
+}
 
-  if (DieArray.empty())
-    return Error::success();
+// Helper to safely check if the non-Compile-Unit DIEs have been parsed
+// already. If they haven't been parsed, go ahead and parse them.
+Error DWARFUnit::extractNonCUDIEsIfNeeded(bool HasCUDie) {
+  Error Result = Error::success();
+  DoubleCheckedRWLocker(
+      ExtractNonCUDIEsMutex,
+      // Lambda to check if all DIEs have been extracted already.
+      [=]() { return (DieArray.empty() || HasCUDie); },
+      // Lambda to extract all the DIEs using the helper function
+      [&]() {
+        if (Error E = extractNonCUDIEsHelper()) {
+          // Consume the success placeholder and save the actual error
+          consumeError(std::move(Result));
+          Result = std::move(E);
+        }
+      });
+  return Result;
+}
 
-  // If CU DIE was just parsed, copy several attribute values from it.
-  if (HasCUDie)
+Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
+  // Acquire the FreeDIEsMutex lock (in read-mode) to prevent the Compile Unit
+  // DIE from being freed by a thread calling clearDIEs() after the CU DIE was
+  // parsed, but before the rest of the DIEs are parsed, as there are no other
+  // locks held during that brief period.
+  llvm::sys::ScopedReader FreeLock(FreeDIEsMutex);
+  bool HasCUDie = false;
+  if (extractCUDieIfNeeded(CUDieOnly, HasCUDie))
     return Error::success();
+  // Right here is where the above-mentioned race condition exists.
+  return extractNonCUDIEsIfNeeded(HasCUDie);
+}
 
+// Helper used from the tryExtractDIEsIfNeeded function: it must already have
+// acquired the ExtractNonCUDIEsMutex for writing.
+Error DWARFUnit::extractNonCUDIEsHelper() {
+  // If CU DIE was just parsed, copy several attribute values from it.
   DWARFDie UnitDie(this, &DieArray[0]);
   if (std::optional<uint64_t> DWOId =
           toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
@@ -653,6 +710,10 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
 }
 
 void DWARFUnit::clearDIEs(bool KeepCUDie) {
+  // We need to acquire the FreeDIEsMutex lock in write-mode, because we are
+  // going to free the DIEs, when other threads might be trying to create them.
+  llvm::sys::ScopedWriter FreeLock(FreeDIEsMutex);
+
   // Do not use resize() + shrink_to_fit() to free memory occupied by dies.
   // shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
   // It depends on the implementation whether the request is fulfilled.

diff  --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 601686fdd3dd5..e1b30648b6a77 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -587,6 +587,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
       DWARFDie Die = getDie(*CU);
       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
       handleDie(Out, CUI, Die);
+      // Release the line table, once we're done.
+      DICtx.clearLineTableForUnit(CU.get());
+      // Free any DIEs that were allocated by the DWARF parser.
+      // If/when they're needed by other CU's, they'll be recreated.
+      CU->clearDIEs(/*KeepCUDie=*/false);
     }
   } else {
     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -612,11 +617,16 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
       DWARFDie Die = getDie(*CU);
       if (Die) {
         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
-        pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
+        pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
           std::string storage;
           raw_string_ostream StrStream(storage);
           OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
           handleDie(ThreadOut, CUI, Die);
+          // Release the line table once we're done.
+          DICtx.clearLineTableForUnit(CU.get());
+          // Free any DIEs that were allocated by the DWARF parser.
+          // If/when they're needed by other CU's, they'll be recreated.
+          CU->clearDIEs(/*KeepCUDie=*/false);
           // Print ThreadLogStorage lines into an actual stream under a lock
           std::lock_guard<std::mutex> guard(LogMutex);
           if (Out.GetOS()) {
@@ -629,6 +639,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
     }
     pool.wait();
   }
+  // Now get rid of all the DIEs that may have been recreated
+  for (const auto &CU : DICtx.compile_units())
+    CU->clearDIEs(/*KeepCUDie=*/false);
   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
   Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
   return Error::success();


        


More information about the llvm-commits mailing list