[lld] 213dbdb - [lld-macho] Overhaul map file code
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 8 13:34:19 PST 2022
Author: Jez Ng
Date: 2022-11-08T16:33:22-05:00
New Revision: 213dbdbef0bad835abca0753f9e59b17dc2bcde2
URL: https://github.com/llvm/llvm-project/commit/213dbdbef0bad835abca0753f9e59b17dc2bcde2
DIFF: https://github.com/llvm/llvm-project/commit/213dbdbef0bad835abca0753f9e59b17dc2bcde2.diff
LOG: [lld-macho] Overhaul map file code
The previous map file code left out was modeled after LLD-ELF's
implementation. However, ld64's map file differs quite a bit from
LLD-ELF's. I've revamped our map file implementation so it is better
able to emit ld64-style map files.
Notable differences:
* ld64 doesn't demangle symbols in map files, regardless of whether
`-demangle` is passed. So we don't have to bother with
`getSymbolStrings()`.
* ld64 doesn't emit symbols in cstring sections; it emits just the
literal values. Moreover, it emits these literal values regardless of
whether they are labeled with a symbol.
* ld64 emits map file entries for things that are not strictly symbols,
such as unwind info, GOT entries, etc. That isn't handled in this
diff, but this redesign makes them easy to implement.
Additionally, the previous implementation sorted the symbols so as to
emit them in address order. This was slow and unnecessary -- the symbols
can already be traversed in address order by walking the list of
OutputSections. This brings significant speedups. Here's the numbers
from the chromium_framework_less_dwarf benchmark on my Mac Pro, with the
`-map` argument added to the response file:
base diff difference (95% CI)
sys_time 2.922 ± 0.059 2.950 ± 0.085 [ -0.7% .. +2.5%]
user_time 11.464 ± 0.191 8.290 ± 0.123 [ -28.7% .. -26.7%]
wall_time 11.235 ± 0.175 9.184 ± 0.169 [ -19.3% .. -17.2%]
samples 16 23
(It's worth noting that map files are written in parallel with the
output binary, but they often took longer to write than the binary
itself.)
Finally, I did further cleanups to the map-file.s test -- there was no
real need to have a custom-named section. There were also alt_entry
symbol declarations that had no corresponding definition. Either way,
neither custom-named sections nor alt_entry symbols trigger special code
paths in our map file implementation.
Reviewed By: #lld-macho, Roger
Differential Revision: https://reviews.llvm.org/D137368
Added:
Modified:
lld/MachO/MapFile.cpp
lld/test/MachO/map-file.s
Removed:
################################################################################
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 8f1b6a13330a7..5d6c87baba9f1 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -6,9 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the -map option. It shows lists in order and
-// hierarchically the outputFile, arch, input files, output sections and
-// symbols:
+// This file implements the -map option, which maps address ranges to their
+// respective contents, plus the input file these contents were originally from.
+// The contents (typically symbols) are listed in address order. Dead-stripped
+// contents are included as well.
//
// # Path: test
// # Arch: x86_84
@@ -28,15 +29,16 @@
//===----------------------------------------------------------------------===//
#include "MapFile.h"
+#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
-#include "OutputSection.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
@@ -45,69 +47,75 @@ using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;
+struct CStringInfo {
+ uint32_t fileIndex;
+ StringRef str;
+};
+
struct MapInfo {
SmallVector<InputFile *> files;
- SmallVector<Defined *> liveSymbols;
SmallVector<Defined *> deadSymbols;
+ DenseMap<const OutputSection *,
+ SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
+ liveCStringsForSection;
+ SmallVector<CStringInfo> deadCStrings;
};
static MapInfo gatherMapInfo() {
MapInfo info;
for (InputFile *file : inputFiles)
if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
- bool hasEmittedSymbol = false;
+ uint32_t fileIndex = info.files.size() + 1;
+ bool isReferencedFile = false;
+
+ // Gather the dead symbols. We don't have to bother with the live ones
+ // because we will pick them up as we iterate over the OutputSections
+ // later.
for (Symbol *sym : file->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym))
- if (d->isec && d->getFile() == file) {
- if (d->isLive()) {
- assert(!shouldOmitFromOutput(d->isec));
- info.liveSymbols.push_back(d);
- } else {
+ // Only emit the prevailing definition of a symbol. Also, don't emit
+ // the symbol if it is part of a cstring section (we use the literal
+ // value instead, similar to ld64)
+ if (d->isec && d->getFile() == file &&
+ !isa<CStringInputSection>(d->isec)) {
+ isReferencedFile = true;
+ if (!d->isLive())
info.deadSymbols.push_back(d);
+ }
+ }
+
+ // Gather all the cstrings (both live and dead). A CString(Output)Section
+ // doesn't provide us a way of figuring out which InputSections its
+ // cstring contents came from, so we need to build up that mapping here.
+ for (const Section *sec : file->sections) {
+ for (const Subsection &subsec : sec->subsections) {
+ if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
+ auto &liveCStrings = info.liveCStringsForSection[isec->parent];
+ for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+ if (piece.live)
+ liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
+ {fileIndex, isec->getStringRef(i)}});
+ else
+ info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
+ isReferencedFile = true;
}
- hasEmittedSymbol = true;
+ } else {
+ break;
}
+ }
}
- if (hasEmittedSymbol)
- info.files.push_back(file);
- }
- parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
- [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
- return info;
-}
-// Construct a map from symbols to their stringified representations.
-// Demangling symbols (which is what toString() does) is slow, so
-// we do that in batch using parallel-for.
-static DenseMap<Symbol *, std::string>
-getSymbolStrings(ArrayRef<Defined *> syms) {
- std::vector<std::string> str(syms.size());
- parallelFor(0, syms.size(), [&](size_t i) {
- raw_string_ostream os(str[i]);
- Defined *sym = syms[i];
-
- switch (sym->isec->kind()) {
- case InputSection::CStringLiteralKind: {
- // Output "literal string: <string literal>"
- const auto *isec = cast<CStringInputSection>(sym->isec);
- const StringPiece &piece = isec->getStringPiece(sym->value);
- assert(
- sym->value == piece.inSecOff &&
- "We expect symbols to always point to the start of a StringPiece.");
- StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
- (os << "literal string: ").write_escaped(str);
- break;
- }
- case InputSection::ConcatKind:
- case InputSection::WordLiteralKind:
- os << toString(*sym);
+ if (isReferencedFile)
+ info.files.push_back(file);
}
- });
- DenseMap<Symbol *, std::string> ret;
- for (size_t i = 0, e = syms.size(); i < e; ++i)
- ret[syms[i]] = std::move(str[i]);
- return ret;
+ // cstrings are not stored in sorted order in their OutputSections, so we sort
+ // them here.
+ for (auto &liveCStrings : info.liveCStringsForSection)
+ parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
+ return p1.first < p2.first;
+ });
+ return info;
}
void macho::writeMapFile() {
@@ -124,16 +132,12 @@ void macho::writeMapFile() {
return;
}
- // Dump output path.
os << format("# Path: %s\n", config->outputFile.str().c_str());
-
- // Dump output architecture.
os << format("# Arch: %s\n",
getArchitectureName(config->arch()).str().c_str());
MapInfo info = gatherMapInfo();
- // Dump table of object files.
os << "# Object files:\n";
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
uint32_t fileIndex = 1;
@@ -143,7 +147,6 @@ void macho::writeMapFile() {
readerToFileOrdinal[file] = fileIndex++;
}
- // Dump table of sections
os << "# Sections:\n";
os << "# Address\tSize \tSegment\tSection\n";
for (OutputSegment *seg : outputSegments)
@@ -155,28 +158,48 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}
- // Dump table of symbols
- DenseMap<Symbol *, std::string> liveSymbolStrings =
- getSymbolStrings(info.liveSymbols);
os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
- for (Defined *sym : info.liveSymbols) {
- assert(sym->isLive());
- os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
- readerToFileOrdinal[sym->getFile()],
- liveSymbolStrings[sym].c_str());
+ for (const OutputSegment *seg : outputSegments) {
+ for (const OutputSection *osec : seg->getSections()) {
+ if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
+ for (const InputSection *isec : concatOsec->inputs) {
+ for (Defined *sym : isec->symbols)
+ os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
+ sym->size, readerToFileOrdinal[sym->getFile()],
+ sym->getName().str().data());
+ }
+ } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+ const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
+ uint64_t lastAddr = 0; // strings will never start at address 0, so this
+ // is a sentinel value
+ for (const auto &[addr, info] : liveCStrings) {
+ uint64_t size = 0;
+ if (addr != lastAddr)
+ size = info.str.size() + 1; // include null terminator
+ lastAddr = addr;
+ os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
+ info.fileIndex);
+ os.write_escaped(info.str) << "\n";
+ }
+ }
+ // TODO print other synthetic sections
+ }
}
if (config->deadStrip) {
- DenseMap<Symbol *, std::string> deadSymbolStrings =
- getSymbolStrings(info.deadSymbols);
os << "# Dead Stripped Symbols:\n";
os << "# \tSize \tFile Name\n";
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
readerToFileOrdinal[sym->getFile()],
- deadSymbolStrings[sym].c_str());
+ sym->getName().str().data());
+ }
+ for (CStringInfo &cstrInfo : info.deadCStrings) {
+ os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
+ cstrInfo.str.size() + 1, cstrInfo.fileIndex);
+ os.write_escaped(cstrInfo.str) << "\n";
}
}
}
diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
index ac5ae9d02074c..fe1ef88604e1e 100644
--- a/lld/test/MachO/map-file.s
+++ b/lld/test/MachO/map-file.s
@@ -4,23 +4,24 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
-# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
+# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
+# RUN: --time-trace -o %t/test
# RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
-# RUN: cat %t/objdump %t/map > %t/out
-# RUN: FileCheck %s < %t/out
+## Check that symbols in cstring sections aren't emitted
+# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
# RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
# CHECK: Sections:
-# CHECK-NEXT: Idx Name Size VMA Type
-# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
-# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT
-# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
-# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
+# CHECK-NEXT: Idx Name Size VMA Type
+# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
+# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
+# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
# CHECK: SYMBOL TABLE:
# CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main
# CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number
-# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo
+# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar
+# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE
# CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world
# CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me
@@ -35,43 +36,50 @@
# CHECK-NEXT: # Sections:
# CHECK-NEXT: # Address Size Segment Section
# CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text
-# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj
# CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring
# CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common
# CHECK-NEXT: # Symbols:
-# CHECK-NEXT: # Address Size File Name
-# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
-# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo
-# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
-# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
-# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
+# CHECK-NEXT: # Address Size File Name
+# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
+# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar
+# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE
+# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
+# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
+# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n
+# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
# MAPFILE: "name":"Total Write map file"
-# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
+# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
# RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
## C-string literals should be printed as "literal string: <C string literal>"
# STRIPPED-LABEL: Dead Stripped Symbols:
-# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] _foo
-# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
-# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
-# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _bar
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] __ZTIN3foo3bar4MethE
+# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
+# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
+# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
# RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
# RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
+## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
+## folded symbols but not folded cstrings; we print both.
+
# ICF: Symbols:
-# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo
-# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
+# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE
+# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
+# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n
+# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n
#--- foo.s
-## ICF will only fold sections marked as pure_instructions
-.section __TEXT,obj,regular,pure_instructions
-.globl _foo
-.alt_entry _alt_foo
-_foo:
+.globl __ZTIN3foo3bar4MethE
+## This C++ symbol makes it clear that we do not print the demangled name in
+## the map file, even if `-demangle` is passed.
+__ZTIN3foo3bar4MethE:
nop
.subsections_via_symbols
@@ -79,12 +87,10 @@ _foo:
#--- test.s
.comm _number, 1
.globl _main, _bar
-.alt_entry _alt_bar
_main:
ret
-.section __TEXT,obj,regular,pure_instructions
_bar:
nop
@@ -101,4 +107,6 @@ _hello_world:
_hello_its_me:
.asciz "Hello, it's me"
+.asciz "Hello world!\n"
+
.subsections_via_symbols
More information about the llvm-commits
mailing list