[lld] d17b092 - [lld-macho] Make writing map file asynchronous

Wed Dec 15 16:41:27 PST 2021

Author: Vincent Lee
Date: 2021-12-15T16:37:04-08:00
New Revision: d17b092fe690e98bc366a2341e050e4fd616e810

URL: https://github.com/llvm/llvm-project/commit/d17b092fe690e98bc366a2341e050e4fd616e810
DIFF: https://github.com/llvm/llvm-project/commit/d17b092fe690e98bc366a2341e050e4fd616e810.diff

LOG: [lld-macho] Make writing map file asynchronous

For large applications that write to map files, writing map files can take quite
a bit of time. Sorting the biggest contributors to link times, writing map files
ranks in at 2nd place, with load input files being the biggest contributor of
link times. Avoiding writing map files on the critical path (and having its own
thread) saves ~2-3 seconds when linking chromium framework on a 16-Core
Intel Xeon W.

```
           base            diff            difference (95% CI)
sys_time   1.617 ± 0.034   1.657 ± 0.026   [  +1.5% ..   +3.5%]
user_time  28.536 ± 0.245  28.609 ± 0.180  [  -0.1% ..   +0.7%]
wall_time  23.833 ± 0.271  21.684 ± 0.194  [  -9.5% ..   -8.5%]
samples    31              24
```

Reviewed By: #lld-macho, oontvoo, int3

Differential Revision: https://reviews.llvm.org/D115416

Added: 
    

Modified: 
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 093a380d175e5..8903f0189ef91 100644

--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 
@@ -64,6 +65,7 @@ class Writer {
 
   template <class LP> void run();
 
+  ThreadPool threadPool;
   std::unique_ptr<FileOutputBuffer> &buffer;
   uint64_t addr = 0;
   uint64_t fileOff = 0;
@@ -1035,10 +1037,14 @@ void Writer::finalizeLinkEditSegment() {
       dataInCodeSection,
       functionStartsSection,
   };
-  parallelForEach(linkEditSections, [](LinkEditSection *osec) {
+  SmallVector<std::shared_future<void>> threadFutures;
+  threadFutures.reserve(linkEditSections.size());
+  for (LinkEditSection *osec : linkEditSections)
     if (osec)
-      osec->finalizeContents();
-  });
+      threadFutures.emplace_back(threadPool.async(
+          [](LinkEditSection *osec) { osec->finalizeContents(); }, osec));
+  for (std::shared_future<void> &future : threadFutures)
+    future.wait();
 
   // Now that __LINKEDIT is filled out, do a proper calculation of its
   // addresses and offsets.
@@ -1091,14 +1097,21 @@ void Writer::writeSections() {
 // values.
 void Writer::writeUuid() {
   TimeTraceScope timeScope("Computing UUID");
+
   ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
   unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
   // Round-up integer division
   size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
   std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
   std::vector<uint64_t> hashes(chunks.size());
-  parallelForEachN(0, chunks.size(),
-                   [&](size_t i) { hashes[i] = xxHash64(chunks[i]); });
+  SmallVector<std::shared_future<void>> threadFutures;
+  threadFutures.reserve(chunks.size());
+  for (size_t i = 0; i < chunks.size(); ++i)
+    threadFutures.emplace_back(threadPool.async(
+        [&](size_t i) { hashes[i] = xxHash64(chunks[i]); }, i));
+  for (std::shared_future<void> &future : threadFutures)
+    future.wait();
+
   uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
                               hashes.size() * sizeof(uint64_t)});
   uuidCommand->writeUuid(digest);
@@ -1147,8 +1160,8 @@ template <class LP> void Writer::run() {
   sortSegmentsAndSections();
   createLoadCommands<LP>();
   finalizeAddresses();
+  threadPool.async(writeMapFile);
   finalizeLinkEditSegment();
-  writeMapFile();
   writeOutputFile();
 }