[llvm] r341568 - [dsymutil] Reduce peak memory usage for the single threaded execution.

Jonas Devlieghere via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 6 10:31:59 PDT 2018


Author: jdevlieghere
Date: Thu Sep  6 10:31:59 2018
New Revision: 341568

URL: http://llvm.org/viewvc/llvm-project?rev=341568&view=rev
Log:
[dsymutil] Reduce peak memory usage for the single threaded execution.

Keeping the compile units in memory is expensive. For the single
threaded case we allocate them in the analyze part and deallocate them
again once we've finished cloning. This poses a problem in the single
threaded case where we did all the analysis first followed by all the
cloning. This meant we had all the link context in memory right after
analyzing finished.

This patch changes the way we order work in the single threaded case.
Instead of doing all the analysis and cloning in serial, we now
interleave the two so we can deallocate the memory as soon as a file is
processed. The result is binary identical and peak memory usage went
down from 13.43GB to 5.73GB for a debug build of trunk clang.

Differential revision: https://reviews.llvm.org/D51618

Modified:
    llvm/trunk/tools/dsymutil/DwarfLinker.cpp

Modified: llvm/trunk/tools/dsymutil/DwarfLinker.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/dsymutil/DwarfLinker.cpp?rev=341568&r1=341567&r2=341568&view=diff
==============================================================================
--- llvm/trunk/tools/dsymutil/DwarfLinker.cpp (original)
+++ llvm/trunk/tools/dsymutil/DwarfLinker.cpp Thu Sep  6 10:31:59 2018
@@ -2483,50 +2483,41 @@ bool DwarfLinker::link(const DebugMap &M
   std::condition_variable ProcessedFilesConditionVariable;
   BitVector ProcessedFiles(NumObjects, false);
 
-  // Now do analyzeContextInfo in parallel as it is particularly expensive.
-  auto AnalyzeLambda = [&]() {
-    for (unsigned i = 0, e = NumObjects; i != e; ++i) {
-      auto &LinkContext = ObjectContexts[i];
-
-      if (!LinkContext.ObjectFile || !LinkContext.DwarfContext) {
-        std::unique_lock<std::mutex> LockGuard(ProcessedFilesMutex);
-        ProcessedFiles.set(i);
-        ProcessedFilesConditionVariable.notify_one();
-        continue;
-      }
-
-      for (const auto &CU : LinkContext.DwarfContext->compile_units()) {
-        updateDwarfVersion(CU->getVersion());
-        // The !registerModuleReference() condition effectively skips
-        // over fully resolved skeleton units. This second pass of
-        // registerModuleReferences doesn't do any new work, but it
-        // will collect top-level errors, which are suppressed. Module
-        // warnings were already displayed in the first iteration.
-        bool Quiet = true;
-        auto CUDie = CU->getUnitDIE(false);
-        if (!CUDie || LLVM_UNLIKELY(Options.Update) ||
-            !registerModuleReference(CUDie, *CU, ModuleMap, LinkContext.DMO,
-                                     LinkContext.Ranges, OffsetsStringPool,
-                                     UniquingStringPool, ODRContexts, UnitID,
-                                     Quiet)) {
-          LinkContext.CompileUnits.push_back(llvm::make_unique<CompileUnit>(
-              *CU, UnitID++, !Options.NoODR && !Options.Update, ""));
-        }
-      }
-      
-      // Now build the DIE parent links that we will use during the next phase.
-      for (auto &CurrentUnit : LinkContext.CompileUnits) {
-        auto CUDie = CurrentUnit->getOrigUnit().getUnitDIE();
-        if (!CUDie)
-          continue;
-        analyzeContextInfo(CurrentUnit->getOrigUnit().getUnitDIE(), 0,
-                           *CurrentUnit, &ODRContexts.getRoot(),
-                           UniquingStringPool, ODRContexts);
+  //  Analyzing the context info is particularly expensive so it is executed in
+  //  parallel with emitting the previous compile unit.
+  auto AnalyzeLambda = [&](size_t i) {
+    auto &LinkContext = ObjectContexts[i];
+
+    if (!LinkContext.ObjectFile || !LinkContext.DwarfContext)
+      return;
+
+    for (const auto &CU : LinkContext.DwarfContext->compile_units()) {
+      updateDwarfVersion(CU->getVersion());
+      // The !registerModuleReference() condition effectively skips
+      // over fully resolved skeleton units. This second pass of
+      // registerModuleReferences doesn't do any new work, but it
+      // will collect top-level errors, which are suppressed. Module
+      // warnings were already displayed in the first iteration.
+      bool Quiet = true;
+      auto CUDie = CU->getUnitDIE(false);
+      if (!CUDie || LLVM_UNLIKELY(Options.Update) ||
+          !registerModuleReference(CUDie, *CU, ModuleMap, LinkContext.DMO,
+                                   LinkContext.Ranges, OffsetsStringPool,
+                                   UniquingStringPool, ODRContexts, UnitID,
+                                   Quiet)) {
+        LinkContext.CompileUnits.push_back(llvm::make_unique<CompileUnit>(
+            *CU, UnitID++, !Options.NoODR && !Options.Update, ""));
       }
+    }
 
-      std::unique_lock<std::mutex> LockGuard(ProcessedFilesMutex);
-      ProcessedFiles.set(i);
-      ProcessedFilesConditionVariable.notify_one();
+    // Now build the DIE parent links that we will use during the next phase.
+    for (auto &CurrentUnit : LinkContext.CompileUnits) {
+      auto CUDie = CurrentUnit->getOrigUnit().getUnitDIE();
+      if (!CUDie)
+        continue;
+      analyzeContextInfo(CurrentUnit->getOrigUnit().getUnitDIE(), 0,
+                         *CurrentUnit, &ODRContexts.getRoot(),
+                         UniquingStringPool, ODRContexts);
     }
   };
 
@@ -2534,57 +2525,48 @@ bool DwarfLinker::link(const DebugMap &M
   // Note, although this loop runs in serial, it can run in parallel with
   // the analyzeContextInfo loop so long as we process files with indices >=
   // than those processed by analyzeContextInfo.
-  auto CloneLambda = [&]() {
-    for (unsigned i = 0, e = NumObjects; i != e; ++i) {
-      {
-        std::unique_lock<std::mutex> LockGuard(ProcessedFilesMutex);
-        if (!ProcessedFiles[i]) {
-          ProcessedFilesConditionVariable.wait(
-              LockGuard, [&]() { return ProcessedFiles[i]; });
-        }
-      }
-
-      auto &LinkContext = ObjectContexts[i];
-      if (!LinkContext.ObjectFile)
-        continue;
-
-      // Then mark all the DIEs that need to be present in the linked output
-      // and collect some information about them.
-      // Note that this loop can not be merged with the previous one because
-      // cross-cu references require the ParentIdx to be setup for every CU in
-      // the object file before calling this.
-      if (LLVM_UNLIKELY(Options.Update)) {
-        for (auto &CurrentUnit : LinkContext.CompileUnits)
-          CurrentUnit->markEverythingAsKept();
-        Streamer->copyInvariantDebugSection(*LinkContext.ObjectFile);
-      } else {
-        for (auto &CurrentUnit : LinkContext.CompileUnits)
-          lookForDIEsToKeep(LinkContext.RelocMgr, LinkContext.Ranges,
-                            LinkContext.CompileUnits,
-                            CurrentUnit->getOrigUnit().getUnitDIE(),
-                            LinkContext.DMO, *CurrentUnit, 0);
-      }
+  auto CloneLambda = [&](size_t i) {
+    auto &LinkContext = ObjectContexts[i];
+    if (!LinkContext.ObjectFile)
+      return;
+
+    // Then mark all the DIEs that need to be present in the linked output
+    // and collect some information about them.
+    // Note that this loop can not be merged with the previous one because
+    // cross-cu references require the ParentIdx to be setup for every CU in
+    // the object file before calling this.
+    if (LLVM_UNLIKELY(Options.Update)) {
+      for (auto &CurrentUnit : LinkContext.CompileUnits)
+        CurrentUnit->markEverythingAsKept();
+      Streamer->copyInvariantDebugSection(*LinkContext.ObjectFile);
+    } else {
+      for (auto &CurrentUnit : LinkContext.CompileUnits)
+        lookForDIEsToKeep(LinkContext.RelocMgr, LinkContext.Ranges,
+                          LinkContext.CompileUnits,
+                          CurrentUnit->getOrigUnit().getUnitDIE(),
+                          LinkContext.DMO, *CurrentUnit, 0);
+    }
 
-      // The calls to applyValidRelocs inside cloneDIE will walk the reloc
-      // array again (in the same way findValidRelocsInDebugInfo() did). We
-      // need to reset the NextValidReloc index to the beginning.
-      LinkContext.RelocMgr.resetValidRelocs();
-      if (LinkContext.RelocMgr.hasValidRelocs() ||
-          LLVM_UNLIKELY(Options.Update))
-        DIECloner(*this, LinkContext.RelocMgr, DIEAlloc,
-                  LinkContext.CompileUnits, Options)
-            .cloneAllCompileUnits(*LinkContext.DwarfContext, LinkContext.DMO,
-                                  LinkContext.Ranges, OffsetsStringPool);
-      if (!Options.NoOutput && !LinkContext.CompileUnits.empty() &&
-          LLVM_LIKELY(!Options.Update))
-        patchFrameInfoForObject(
-            LinkContext.DMO, LinkContext.Ranges, *LinkContext.DwarfContext,
-            LinkContext.CompileUnits[0]->getOrigUnit().getAddressByteSize());
+    // The calls to applyValidRelocs inside cloneDIE will walk the reloc
+    // array again (in the same way findValidRelocsInDebugInfo() did). We
+    // need to reset the NextValidReloc index to the beginning.
+    LinkContext.RelocMgr.resetValidRelocs();
+    if (LinkContext.RelocMgr.hasValidRelocs() || LLVM_UNLIKELY(Options.Update))
+      DIECloner(*this, LinkContext.RelocMgr, DIEAlloc, LinkContext.CompileUnits,
+                Options)
+          .cloneAllCompileUnits(*LinkContext.DwarfContext, LinkContext.DMO,
+                                LinkContext.Ranges, OffsetsStringPool);
+    if (!Options.NoOutput && !LinkContext.CompileUnits.empty() &&
+        LLVM_LIKELY(!Options.Update))
+      patchFrameInfoForObject(
+          LinkContext.DMO, LinkContext.Ranges, *LinkContext.DwarfContext,
+          LinkContext.CompileUnits[0]->getOrigUnit().getAddressByteSize());
 
-      // Clean-up before starting working on the next object.
-      endDebugObject(LinkContext);
-    }
+    // Clean-up before starting working on the next object.
+    endDebugObject(LinkContext);
+  };
 
+  auto EmitLambda = [&]() {
     // Emit everything that's global.
     if (!Options.NoOutput) {
       Streamer->emitAbbrevs(Abbreviations, MaxDwarfVersion);
@@ -2606,16 +2588,44 @@ bool DwarfLinker::link(const DebugMap &M
     }
   };
 
-  // FIXME: The DwarfLinker can have some very deep recursion that can max
-  // out the (significantly smaller) stack when using threads. We don't
-  // want this limitation when we only have a single thread.
+  auto AnalyzeAll = [&]() {
+    for (unsigned i = 0, e = NumObjects; i != e; ++i) {
+      AnalyzeLambda(i);
+
+      std::unique_lock<std::mutex> LockGuard(ProcessedFilesMutex);
+      ProcessedFiles.set(i);
+      ProcessedFilesConditionVariable.notify_one();
+    }
+  };
+
+  auto CloneAll = [&]() {
+    for (unsigned i = 0, e = NumObjects; i != e; ++i) {
+      {
+        std::unique_lock<std::mutex> LockGuard(ProcessedFilesMutex);
+        if (!ProcessedFiles[i]) {
+          ProcessedFilesConditionVariable.wait(
+              LockGuard, [&]() { return ProcessedFiles[i]; });
+        }
+      }
+
+      CloneLambda(i);
+    }
+    EmitLambda();
+  };
+
+  // To limit memory usage in the single threaded case, analyze and clone are
+  // run sequentially so the LinkContext is freed after processing each object
+  // in endDebugObject.
   if (Options.Threads == 1) {
-    AnalyzeLambda();
-    CloneLambda();
+    for (unsigned i = 0, e = NumObjects; i != e; ++i) {
+      AnalyzeLambda(i);
+      CloneLambda(i);
+    }
+    EmitLambda();
   } else {
     ThreadPool pool(2);
-    pool.async(AnalyzeLambda);
-    pool.async(CloneLambda);
+    pool.async(AnalyzeAll);
+    pool.async(CloneAll);
     pool.wait();
   }
 




More information about the llvm-commits mailing list