[lld] 95d21f6 - [lld-macho] Reduce memory usage of printing thunks in map file (#122785)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 15 22:58:27 PST 2025
Author: alx32
Date: 2025-01-15T22:58:24-08:00
New Revision: 95d21f6015241f1fbf36e495f101080bdcee8cd4
URL: https://github.com/llvm/llvm-project/commit/95d21f6015241f1fbf36e495f101080bdcee8cd4
DIFF: https://github.com/llvm/llvm-project/commit/95d21f6015241f1fbf36e495f101080bdcee8cd4.diff
LOG: [lld-macho] Reduce memory usage of printing thunks in map file (#122785)
This commit improves the memory efficiency of the lld-macho linker by
optimizing how thunks are printed in the map file. Previously, merging
vectors of input sections required creating a temporary vector, which
increased memory usage and in some cases caused the linker to run out of
memory as reported in comments on
https://github.com/llvm/llvm-project/pull/120496. The new approach
interleaves the printing of two arrays of ConcatInputSection in sorted
order without allocating additional memory for a merged array.
Added:
Modified:
lld/MachO/MapFile.cpp
lld/test/MachO/arm64-thunks.s
Removed:
################################################################################
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 12417df8cecb8c..8919c8d2f9b9c9 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -161,20 +161,6 @@ static uint64_t getSymSizeForMap(Defined *sym) {
return sym->size;
}
-// Merges two vectors of input sections in order of their outSecOff values.
-// This approach creates a new (temporary) vector which is not ideal but the
-// ideal approach leads to a lot of code duplication.
-static std::vector<ConcatInputSection *>
-mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
- ArrayRef<ConcatInputSection *> inputs2) {
- std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
- std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
- vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
- return a->outSecOff < b->outSecOff;
- });
- return vec;
-}
-
void macho::writeMapFile() {
if (config->mapFile.empty())
return;
@@ -217,15 +203,32 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}
- // Shared function to print an array of symbols.
- auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
- for (const ConcatInputSection *isec : arr) {
- for (Defined *sym : isec->symbols) {
- if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
- os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
- getSymSizeForMap(sym),
- readerToFileOrdinal[sym->getFile()],
- sym->getName().str().data());
+ // Helper lambda that prints all symbols from one ConcatInputSection.
+ auto printOne = [&](const ConcatInputSection *isec) {
+ for (Defined *sym : isec->symbols) {
+ if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) {
+ os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
+ getSymSizeForMap(sym),
+ readerToFileOrdinal.lookup(sym->getFile()),
+ sym->getName().str().data());
+ }
+ }
+ };
+ // Shared function to print one or two arrays of ConcatInputSection in
+ // ascending outSecOff order. The second array is optional; if provided, we
+ // interleave the printing in sorted order without allocating a merged temp
+ // array.
+ auto printIsecArrSyms = [&](ArrayRef<ConcatInputSection *> arr1,
+ ArrayRef<ConcatInputSection *> arr2 = {}) {
+ // Print both arrays in sorted order, interleaving as necessary.
+ while (!arr1.empty() || !arr2.empty()) {
+ if (!arr1.empty() && (arr2.empty() || arr1.front()->outSecOff <=
+ arr2.front()->outSecOff)) {
+ printOne(arr1.front());
+ arr1 = arr1.drop_front();
+ } else if (!arr2.empty()) {
+ printOne(arr2.front());
+ arr2 = arr2.drop_front();
}
}
};
@@ -235,9 +238,7 @@ void macho::writeMapFile() {
for (const OutputSegment *seg : outputSegments) {
for (const OutputSection *osec : seg->getSections()) {
if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
- auto inputsAndThunks =
- mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
- printIsecArrSyms(inputsAndThunks);
+ printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
printIsecArrSyms(concatOsec->inputs);
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
diff --git a/lld/test/MachO/arm64-thunks.s b/lld/test/MachO/arm64-thunks.s
index 858a27dfe36af5..76c7d108104d13 100644
--- a/lld/test/MachO/arm64-thunks.s
+++ b/lld/test/MachO/arm64-thunks.s
@@ -17,13 +17,7 @@
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
-## Check that the thunks appear in the map file and that everything is sorted by address
-# Because of the `.space` instructions, there will end up being a lot of dead symbols in the
-# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
-# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
-# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map
-
-# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
+# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
@@ -339,7 +333,12 @@ _main:
ret
.section __TEXT,__cstring
- .space 0x4000000
+ # The .space below has to be composed of non-zero characters. Otherwise, the
+ # linker will create a symbol for every '0' in the section, leading to
+ # dramatic memory usage and a huge linker map file
+ .space 0x4000000, 'A'
+ .byte 0
+
.section __TEXT,__lcxx_override,regular,pure_instructions
More information about the llvm-commits
mailing list