[lld] 95d21f6 - [lld-macho] Reduce memory usage of printing thunks in map file (#122785)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 15 22:58:27 PST 2025


Author: alx32
Date: 2025-01-15T22:58:24-08:00
New Revision: 95d21f6015241f1fbf36e495f101080bdcee8cd4

URL: https://github.com/llvm/llvm-project/commit/95d21f6015241f1fbf36e495f101080bdcee8cd4
DIFF: https://github.com/llvm/llvm-project/commit/95d21f6015241f1fbf36e495f101080bdcee8cd4.diff

LOG: [lld-macho] Reduce memory usage of printing thunks in map file (#122785)

This commit improves the memory efficiency of the lld-macho linker by
optimizing how thunks are printed in the map file. Previously, merging
vectors of input sections required creating a temporary vector, which
increased memory usage and in some cases caused the linker to run out of
memory as reported in comments on
https://github.com/llvm/llvm-project/pull/120496. The new approach
interleaves the printing of two arrays of ConcatInputSection in sorted
order without allocating additional memory for a merged array.

Added: 
    

Modified: 
    lld/MachO/MapFile.cpp
    lld/test/MachO/arm64-thunks.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 12417df8cecb8c..8919c8d2f9b9c9 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -161,20 +161,6 @@ static uint64_t getSymSizeForMap(Defined *sym) {
   return sym->size;
 }
 
-// Merges two vectors of input sections in order of their outSecOff values.
-// This approach creates a new (temporary) vector which is not ideal but the
-// ideal approach leads to a lot of code duplication.
-static std::vector<ConcatInputSection *>
-mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
-                   ArrayRef<ConcatInputSection *> inputs2) {
-  std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
-  std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
-             vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
-               return a->outSecOff < b->outSecOff;
-             });
-  return vec;
-}
-
 void macho::writeMapFile() {
   if (config->mapFile.empty())
     return;
@@ -217,15 +203,32 @@ void macho::writeMapFile() {
                    seg->name.str().c_str(), osec->name.str().c_str());
     }
 
-  // Shared function to print an array of symbols.
-  auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
-    for (const ConcatInputSection *isec : arr) {
-      for (Defined *sym : isec->symbols) {
-        if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
-          os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
-                       getSymSizeForMap(sym),
-                       readerToFileOrdinal[sym->getFile()],
-                       sym->getName().str().data());
+  // Helper lambda that prints all symbols from one ConcatInputSection.
+  auto printOne = [&](const ConcatInputSection *isec) {
+    for (Defined *sym : isec->symbols) {
+      if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) {
+        os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
+                     getSymSizeForMap(sym),
+                     readerToFileOrdinal.lookup(sym->getFile()),
+                     sym->getName().str().data());
+      }
+    }
+  };
+  // Shared function to print one or two arrays of ConcatInputSection in
+  // ascending outSecOff order. The second array is optional; if provided, we
+  // interleave the printing in sorted order without allocating a merged temp
+  // array.
+  auto printIsecArrSyms = [&](ArrayRef<ConcatInputSection *> arr1,
+                              ArrayRef<ConcatInputSection *> arr2 = {}) {
+    // Print both arrays in sorted order, interleaving as necessary.
+    while (!arr1.empty() || !arr2.empty()) {
+      if (!arr1.empty() && (arr2.empty() || arr1.front()->outSecOff <=
+                                                arr2.front()->outSecOff)) {
+        printOne(arr1.front());
+        arr1 = arr1.drop_front();
+      } else if (!arr2.empty()) {
+        printOne(arr2.front());
+        arr2 = arr2.drop_front();
       }
     }
   };
@@ -235,9 +238,7 @@ void macho::writeMapFile() {
   for (const OutputSegment *seg : outputSegments) {
     for (const OutputSection *osec : seg->getSections()) {
       if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
-        auto inputsAndThunks =
-            mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
-        printIsecArrSyms(inputsAndThunks);
+        printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
       } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
       } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {

diff  --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index 858a27dfe36af5..76c7d108104d13 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -17,13 +17,7 @@
 # RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
 # RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
 
-## Check that the thunks appear in the map file and that everything is sorted by address
-# Because of the `.space` instructions, there will end up being a lot of dead symbols in the 
-# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
-# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
-# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map
-
-# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
+# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
  
 # MAP:      0x{{[[:xdigit:]]+}} {{.*}} _b
 # MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
@@ -339,7 +333,12 @@ _main:
   ret
 
 .section __TEXT,__cstring
-  .space 0x4000000
+  # The .space below has to be composed of non-zero characters. Otherwise, the
+  # linker will create a symbol for every '0' in the section, leading to
+  # dramatic memory usage and a huge linker map file
+  .space 0x4000000, 'A'
+  .byte 0
+
 
 .section __TEXT,__lcxx_override,regular,pure_instructions
 


        


More information about the llvm-commits mailing list