[lld] 7ca32bd - Reland "[lld-macho] Overhaul map file code"
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 13:57:53 PST 2022
Author: Jez Ng
Date: 2022-12-05T16:57:35-05:00
New Revision: 7ca32bd402ddc31ace69d0a85362240c961b4f1d
URL: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d
DIFF: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d.diff
LOG: Reland "[lld-macho] Overhaul map file code"
This reverts commit 38d6202a425462ce5923d038bc54532115a80a1f.
Differential Revision: https://reviews.llvm.org/D137368
Added:
Modified:
lld/MachO/MapFile.cpp
lld/test/MachO/map-file.s
Removed:
################################################################################
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 8f1b6a13330a..5d6c87baba9f 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -6,9 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the -map option. It shows lists in order and
-// hierarchically the outputFile, arch, input files, output sections and
-// symbols:
+// This file implements the -map option, which maps address ranges to their
+// respective contents, plus the input file these contents were originally from.
+// The contents (typically symbols) are listed in address order. Dead-stripped
+// contents are included as well.
//
// # Path: test
// # Arch: x86_84
@@ -28,15 +29,16 @@
//===----------------------------------------------------------------------===//
#include "MapFile.h"
+#include "ConcatOutputSection.h"
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
-#include "OutputSection.h"
#include "OutputSegment.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
@@ -45,69 +47,75 @@ using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;
+struct CStringInfo {
+ uint32_t fileIndex;
+ StringRef str;
+};
+
struct MapInfo {
SmallVector<InputFile *> files;
- SmallVector<Defined *> liveSymbols;
SmallVector<Defined *> deadSymbols;
+ DenseMap<const OutputSection *,
+ SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
+ liveCStringsForSection;
+ SmallVector<CStringInfo> deadCStrings;
};
static MapInfo gatherMapInfo() {
MapInfo info;
for (InputFile *file : inputFiles)
if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
- bool hasEmittedSymbol = false;
+ uint32_t fileIndex = info.files.size() + 1;
+ bool isReferencedFile = false;
+
+ // Gather the dead symbols. We don't have to bother with the live ones
+ // because we will pick them up as we iterate over the OutputSections
+ // later.
for (Symbol *sym : file->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym))
- if (d->isec && d->getFile() == file) {
- if (d->isLive()) {
- assert(!shouldOmitFromOutput(d->isec));
- info.liveSymbols.push_back(d);
- } else {
+ // Only emit the prevailing definition of a symbol. Also, don't emit
+ // the symbol if it is part of a cstring section (we use the literal
+ // value instead, similar to ld64)
+ if (d->isec && d->getFile() == file &&
+ !isa<CStringInputSection>(d->isec)) {
+ isReferencedFile = true;
+ if (!d->isLive())
info.deadSymbols.push_back(d);
+ }
+ }
+
+ // Gather all the cstrings (both live and dead). A CString(Output)Section
+ // doesn't provide us a way of figuring out which InputSections its
+ // cstring contents came from, so we need to build up that mapping here.
+ for (const Section *sec : file->sections) {
+ for (const Subsection &subsec : sec->subsections) {
+ if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
+ auto &liveCStrings = info.liveCStringsForSection[isec->parent];
+ for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
+ if (piece.live)
+ liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
+ {fileIndex, isec->getStringRef(i)}});
+ else
+ info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
+ isReferencedFile = true;
}
- hasEmittedSymbol = true;
+ } else {
+ break;
}
+ }
}
- if (hasEmittedSymbol)
- info.files.push_back(file);
- }
- parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
- [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
- return info;
-}
-// Construct a map from symbols to their stringified representations.
-// Demangling symbols (which is what toString() does) is slow, so
-// we do that in batch using parallel-for.
-static DenseMap<Symbol *, std::string>
-getSymbolStrings(ArrayRef<Defined *> syms) {
- std::vector<std::string> str(syms.size());
- parallelFor(0, syms.size(), [&](size_t i) {
- raw_string_ostream os(str[i]);
- Defined *sym = syms[i];
-
- switch (sym->isec->kind()) {
- case InputSection::CStringLiteralKind: {
- // Output "literal string: <string literal>"
- const auto *isec = cast<CStringInputSection>(sym->isec);
- const StringPiece &piece = isec->getStringPiece(sym->value);
- assert(
- sym->value == piece.inSecOff &&
- "We expect symbols to always point to the start of a StringPiece.");
- StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
- (os << "literal string: ").write_escaped(str);
- break;
- }
- case InputSection::ConcatKind:
- case InputSection::WordLiteralKind:
- os << toString(*sym);
+ if (isReferencedFile)
+ info.files.push_back(file);
}
- });
- DenseMap<Symbol *, std::string> ret;
- for (size_t i = 0, e = syms.size(); i < e; ++i)
- ret[syms[i]] = std::move(str[i]);
- return ret;
+ // cstrings are not stored in sorted order in their OutputSections, so we sort
+ // them here.
+ for (auto &liveCStrings : info.liveCStringsForSection)
+ parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
+ return p1.first < p2.first;
+ });
+ return info;
}
void macho::writeMapFile() {
@@ -124,16 +132,12 @@ void macho::writeMapFile() {
return;
}
- // Dump output path.
os << format("# Path: %s\n", config->outputFile.str().c_str());
-
- // Dump output architecture.
os << format("# Arch: %s\n",
getArchitectureName(config->arch()).str().c_str());
MapInfo info = gatherMapInfo();
- // Dump table of object files.
os << "# Object files:\n";
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
uint32_t fileIndex = 1;
@@ -143,7 +147,6 @@ void macho::writeMapFile() {
readerToFileOrdinal[file] = fileIndex++;
}
- // Dump table of sections
os << "# Sections:\n";
os << "# Address\tSize \tSegment\tSection\n";
for (OutputSegment *seg : outputSegments)
@@ -155,28 +158,48 @@ void macho::writeMapFile() {
seg->name.str().c_str(), osec->name.str().c_str());
}
- // Dump table of symbols
- DenseMap<Symbol *, std::string> liveSymbolStrings =
- getSymbolStrings(info.liveSymbols);
os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
- for (Defined *sym : info.liveSymbols) {
- assert(sym->isLive());
- os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
- readerToFileOrdinal[sym->getFile()],
- liveSymbolStrings[sym].c_str());
+ for (const OutputSegment *seg : outputSegments) {
+ for (const OutputSection *osec : seg->getSections()) {
+ if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
+ for (const InputSection *isec : concatOsec->inputs) {
+ for (Defined *sym : isec->symbols)
+ os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
+ sym->size, readerToFileOrdinal[sym->getFile()],
+ sym->getName().str().data());
+ }
+ } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+ const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
+ uint64_t lastAddr = 0; // strings will never start at address 0, so this
+ // is a sentinel value
+ for (const auto &[addr, info] : liveCStrings) {
+ uint64_t size = 0;
+ if (addr != lastAddr)
+ size = info.str.size() + 1; // include null terminator
+ lastAddr = addr;
+ os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
+ info.fileIndex);
+ os.write_escaped(info.str) << "\n";
+ }
+ }
+ // TODO print other synthetic sections
+ }
}
if (config->deadStrip) {
- DenseMap<Symbol *, std::string> deadSymbolStrings =
- getSymbolStrings(info.deadSymbols);
os << "# Dead Stripped Symbols:\n";
os << "# \tSize \tFile Name\n";
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
readerToFileOrdinal[sym->getFile()],
- deadSymbolStrings[sym].c_str());
+ sym->getName().str().data());
+ }
+ for (CStringInfo &cstrInfo : info.deadCStrings) {
+ os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
+ cstrInfo.str.size() + 1, cstrInfo.fileIndex);
+ os.write_escaped(cstrInfo.str) << "\n";
}
}
}
diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
index ac5ae9d02074..fe1ef88604e1 100644
--- a/lld/test/MachO/map-file.s
+++ b/lld/test/MachO/map-file.s
@@ -4,23 +4,24 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
-# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
+# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
+# RUN: --time-trace -o %t/test
# RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
-# RUN: cat %t/objdump %t/map > %t/out
-# RUN: FileCheck %s < %t/out
+## Check that symbols in cstring sections aren't emitted
+# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
# RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
# CHECK: Sections:
-# CHECK-NEXT: Idx Name Size VMA Type
-# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
-# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT
-# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
-# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
+# CHECK-NEXT: Idx Name Size VMA Type
+# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
+# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
+# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
# CHECK: SYMBOL TABLE:
# CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main
# CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number
-# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo
+# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar
+# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE
# CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world
# CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me
@@ -35,43 +36,50 @@
# CHECK-NEXT: # Sections:
# CHECK-NEXT: # Address Size Segment Section
# CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text
-# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj
# CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring
# CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common
# CHECK-NEXT: # Symbols:
-# CHECK-NEXT: # Address Size File Name
-# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
-# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo
-# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
-# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
-# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
+# CHECK-NEXT: # Address Size File Name
+# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
+# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar
+# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE
+# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
+# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
+# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n
+# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
# MAPFILE: "name":"Total Write map file"
-# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
+# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
# RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
## C-string literals should be printed as "literal string: <C string literal>"
# STRIPPED-LABEL: Dead Stripped Symbols:
-# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] _foo
-# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
-# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
-# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _bar
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
+# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] __ZTIN3foo3bar4MethE
+# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
+# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
+# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
# RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
# RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
+## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
+## folded symbols but not folded cstrings; we print both.
+
# ICF: Symbols:
-# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo
-# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
+# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE
+# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
+# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n
+# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n
#--- foo.s
-## ICF will only fold sections marked as pure_instructions
-.section __TEXT,obj,regular,pure_instructions
-.globl _foo
-.alt_entry _alt_foo
-_foo:
+.globl __ZTIN3foo3bar4MethE
+## This C++ symbol makes it clear that we do not print the demangled name in
+## the map file, even if `-demangle` is passed.
+__ZTIN3foo3bar4MethE:
nop
.subsections_via_symbols
@@ -79,12 +87,10 @@ _foo:
#--- test.s
.comm _number, 1
.globl _main, _bar
-.alt_entry _alt_bar
_main:
ret
-.section __TEXT,obj,regular,pure_instructions
_bar:
nop
@@ -101,4 +107,6 @@ _hello_world:
_hello_its_me:
.asciz "Hello, it's me"
+.asciz "Hello world!\n"
+
.subsections_via_symbols
More information about the llvm-commits
mailing list