[lld] 7ca32bd - Reland "[lld-macho] Overhaul map file code"
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 13:59:24 PST 2022
Reminder to please always mention the reason for the revert in the
commit message.
On Tue, Dec 6, 2022 at 12:57 AM Jez Ng via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Jez Ng
> Date: 2022-12-05T16:57:35-05:00
> New Revision: 7ca32bd402ddc31ace69d0a85362240c961b4f1d
>
> URL: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d
> DIFF: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d.diff
>
> LOG: Reland "[lld-macho] Overhaul map file code"
>
> This reverts commit 38d6202a425462ce5923d038bc54532115a80a1f.
>
> Differential Revision: https://reviews.llvm.org/D137368
>
> Added:
>
>
> Modified:
> lld/MachO/MapFile.cpp
> lld/test/MachO/map-file.s
>
> Removed:
>
>
>
> ################################################################################
> diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
> index 8f1b6a13330a..5d6c87baba9f 100644
> --- a/lld/MachO/MapFile.cpp
> +++ b/lld/MachO/MapFile.cpp
> @@ -6,9 +6,10 @@
> //
> //===----------------------------------------------------------------------===//
> //
> -// This file implements the -map option. It shows lists in order and
> -// hierarchically the outputFile, arch, input files, output sections and
> -// symbols:
> +// This file implements the -map option, which maps address ranges to their
> +// respective contents, plus the input file these contents were originally from.
> +// The contents (typically symbols) are listed in address order. Dead-stripped
> +// contents are included as well.
> //
> // # Path: test
> // # Arch: x86_84
> @@ -28,15 +29,16 @@
> //===----------------------------------------------------------------------===//
>
> #include "MapFile.h"
> +#include "ConcatOutputSection.h"
> #include "Config.h"
> #include "InputFiles.h"
> #include "InputSection.h"
> -#include "OutputSection.h"
> #include "OutputSegment.h"
> #include "Symbols.h"
> #include "SyntheticSections.h"
> #include "Target.h"
> #include "lld/Common/ErrorHandler.h"
> +#include "llvm/ADT/DenseMap.h"
> #include "llvm/Support/Parallel.h"
> #include "llvm/Support/TimeProfiler.h"
>
> @@ -45,69 +47,75 @@ using namespace llvm::sys;
> using namespace lld;
> using namespace lld::macho;
>
> +struct CStringInfo {
> + uint32_t fileIndex;
> + StringRef str;
> +};
> +
> struct MapInfo {
> SmallVector<InputFile *> files;
> - SmallVector<Defined *> liveSymbols;
> SmallVector<Defined *> deadSymbols;
> + DenseMap<const OutputSection *,
> + SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
> + liveCStringsForSection;
> + SmallVector<CStringInfo> deadCStrings;
> };
>
> static MapInfo gatherMapInfo() {
> MapInfo info;
> for (InputFile *file : inputFiles)
> if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
> - bool hasEmittedSymbol = false;
> + uint32_t fileIndex = info.files.size() + 1;
> + bool isReferencedFile = false;
> +
> + // Gather the dead symbols. We don't have to bother with the live ones
> + // because we will pick them up as we iterate over the OutputSections
> + // later.
> for (Symbol *sym : file->symbols) {
> if (auto *d = dyn_cast_or_null<Defined>(sym))
> - if (d->isec && d->getFile() == file) {
> - if (d->isLive()) {
> - assert(!shouldOmitFromOutput(d->isec));
> - info.liveSymbols.push_back(d);
> - } else {
> + // Only emit the prevailing definition of a symbol. Also, don't emit
> + // the symbol if it is part of a cstring section (we use the literal
> + // value instead, similar to ld64)
> + if (d->isec && d->getFile() == file &&
> + !isa<CStringInputSection>(d->isec)) {
> + isReferencedFile = true;
> + if (!d->isLive())
> info.deadSymbols.push_back(d);
> + }
> + }
> +
> + // Gather all the cstrings (both live and dead). A CString(Output)Section
> + // doesn't provide us a way of figuring out which InputSections its
> + // cstring contents came from, so we need to build up that mapping here.
> + for (const Section *sec : file->sections) {
> + for (const Subsection &subsec : sec->subsections) {
> + if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
> + auto &liveCStrings = info.liveCStringsForSection[isec->parent];
> + for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
> + if (piece.live)
> + liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
> + {fileIndex, isec->getStringRef(i)}});
> + else
> + info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
> + isReferencedFile = true;
> }
> - hasEmittedSymbol = true;
> + } else {
> + break;
> }
> + }
> }
> - if (hasEmittedSymbol)
> - info.files.push_back(file);
> - }
> - parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
> - [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
> - return info;
> -}
>
> -// Construct a map from symbols to their stringified representations.
> -// Demangling symbols (which is what toString() does) is slow, so
> -// we do that in batch using parallel-for.
> -static DenseMap<Symbol *, std::string>
> -getSymbolStrings(ArrayRef<Defined *> syms) {
> - std::vector<std::string> str(syms.size());
> - parallelFor(0, syms.size(), [&](size_t i) {
> - raw_string_ostream os(str[i]);
> - Defined *sym = syms[i];
> -
> - switch (sym->isec->kind()) {
> - case InputSection::CStringLiteralKind: {
> - // Output "literal string: <string literal>"
> - const auto *isec = cast<CStringInputSection>(sym->isec);
> - const StringPiece &piece = isec->getStringPiece(sym->value);
> - assert(
> - sym->value == piece.inSecOff &&
> - "We expect symbols to always point to the start of a StringPiece.");
> - StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
> - (os << "literal string: ").write_escaped(str);
> - break;
> - }
> - case InputSection::ConcatKind:
> - case InputSection::WordLiteralKind:
> - os << toString(*sym);
> + if (isReferencedFile)
> + info.files.push_back(file);
> }
> - });
>
> - DenseMap<Symbol *, std::string> ret;
> - for (size_t i = 0, e = syms.size(); i < e; ++i)
> - ret[syms[i]] = std::move(str[i]);
> - return ret;
> + // cstrings are not stored in sorted order in their OutputSections, so we sort
> + // them here.
> + for (auto &liveCStrings : info.liveCStringsForSection)
> + parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
> + return p1.first < p2.first;
> + });
> + return info;
> }
>
> void macho::writeMapFile() {
> @@ -124,16 +132,12 @@ void macho::writeMapFile() {
> return;
> }
>
> - // Dump output path.
> os << format("# Path: %s\n", config->outputFile.str().c_str());
> -
> - // Dump output architecture.
> os << format("# Arch: %s\n",
> getArchitectureName(config->arch()).str().c_str());
>
> MapInfo info = gatherMapInfo();
>
> - // Dump table of object files.
> os << "# Object files:\n";
> os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
> uint32_t fileIndex = 1;
> @@ -143,7 +147,6 @@ void macho::writeMapFile() {
> readerToFileOrdinal[file] = fileIndex++;
> }
>
> - // Dump table of sections
> os << "# Sections:\n";
> os << "# Address\tSize \tSegment\tSection\n";
> for (OutputSegment *seg : outputSegments)
> @@ -155,28 +158,48 @@ void macho::writeMapFile() {
> seg->name.str().c_str(), osec->name.str().c_str());
> }
>
> - // Dump table of symbols
> - DenseMap<Symbol *, std::string> liveSymbolStrings =
> - getSymbolStrings(info.liveSymbols);
> os << "# Symbols:\n";
> os << "# Address\tSize \tFile Name\n";
> - for (Defined *sym : info.liveSymbols) {
> - assert(sym->isLive());
> - os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
> - readerToFileOrdinal[sym->getFile()],
> - liveSymbolStrings[sym].c_str());
> + for (const OutputSegment *seg : outputSegments) {
> + for (const OutputSection *osec : seg->getSections()) {
> + if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
> + for (const InputSection *isec : concatOsec->inputs) {
> + for (Defined *sym : isec->symbols)
> + os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
> + sym->size, readerToFileOrdinal[sym->getFile()],
> + sym->getName().str().data());
> + }
> + } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
> + const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
> + uint64_t lastAddr = 0; // strings will never start at address 0, so this
> + // is a sentinel value
> + for (const auto &[addr, info] : liveCStrings) {
> + uint64_t size = 0;
> + if (addr != lastAddr)
> + size = info.str.size() + 1; // include null terminator
> + lastAddr = addr;
> + os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
> + info.fileIndex);
> + os.write_escaped(info.str) << "\n";
> + }
> + }
> + // TODO print other synthetic sections
> + }
> }
>
> if (config->deadStrip) {
> - DenseMap<Symbol *, std::string> deadSymbolStrings =
> - getSymbolStrings(info.deadSymbols);
> os << "# Dead Stripped Symbols:\n";
> os << "# \tSize \tFile Name\n";
> for (Defined *sym : info.deadSymbols) {
> assert(!sym->isLive());
> os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
> readerToFileOrdinal[sym->getFile()],
> - deadSymbolStrings[sym].c_str());
> + sym->getName().str().data());
> + }
> + for (CStringInfo &cstrInfo : info.deadCStrings) {
> + os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
> + cstrInfo.str.size() + 1, cstrInfo.fileIndex);
> + os.write_escaped(cstrInfo.str) << "\n";
> }
> }
> }
>
> diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
> index ac5ae9d02074..fe1ef88604e1 100644
> --- a/lld/test/MachO/map-file.s
> +++ b/lld/test/MachO/map-file.s
> @@ -4,23 +4,24 @@
> # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
> # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
>
> -# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
> +# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
> +# RUN: --time-trace -o %t/test
> # RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
> -# RUN: cat %t/objdump %t/map > %t/out
> -# RUN: FileCheck %s < %t/out
> +## Check that symbols in cstring sections aren't emitted
> +# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
> # RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
>
> # CHECK: Sections:
> -# CHECK-NEXT: Idx Name Size VMA Type
> -# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
> -# CHECK-NEXT: 1 obj {{[0-9a-f]+}} [[#%x,DATA:]] TEXT
> -# CHECK-NEXT: 2 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
> -# CHECK-NEXT: 3 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
> +# CHECK-NEXT: Idx Name Size VMA Type
> +# CHECK-NEXT: 0 __text {{[0-9a-f]+}} [[#%x,TEXT:]] TEXT
> +# CHECK-NEXT: 1 __cstring {{[0-9a-f]+}} [[#%x,CSTR:]] DATA
> +# CHECK-NEXT: 2 __common {{[0-9a-f]+}} [[#%x,BSS:]] BSS
>
> # CHECK: SYMBOL TABLE:
> # CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main
> # CHECK-DAG: [[#%x,NUMBER:]] g O __DATA,__common _number
> -# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,obj _foo
> +# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar
> +# CHECK-DAG: [[#%x,FOO:]] g F __TEXT,__text __ZTIN3foo3bar4MethE
> # CHECK-DAG: [[#%x,HIWORLD:]] g O __TEXT,__cstring _hello_world
> # CHECK-DAG: [[#%x,HIITSME:]] g O __TEXT,__cstring _hello_its_me
>
> @@ -35,43 +36,50 @@
> # CHECK-NEXT: # Sections:
> # CHECK-NEXT: # Address Size Segment Section
> # CHECK-NEXT: 0x[[#%X,TEXT]] 0x{{[0-9A-F]+}} __TEXT __text
> -# CHECK-NEXT: 0x[[#%X,DATA]] 0x{{[0-9A-F]+}} __TEXT obj
> # CHECK-NEXT: 0x[[#%X,CSTR]] 0x{{[0-9A-F]+}} __TEXT __cstring
> # CHECK-NEXT: 0x[[#%X,BSS]] 0x{{[0-9A-F]+}} __DATA __common
>
> # CHECK-NEXT: # Symbols:
> -# CHECK-NEXT: # Address Size File Name
> -# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
> -# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] _foo
> -# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
> -# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
> -# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
> +# CHECK-NEXT: # Address Size File Name
> +# CHECK-DAG: 0x[[#%X,MAIN]] 0x00000001 [ 1] _main
> +# CHECK-DAG: 0x[[#%X,BAR]] 0x00000001 [ 1] _bar
> +# CHECK-DAG: 0x[[#%X,FOO]] 0x00000001 [ 2] __ZTIN3foo3bar4MethE
> +# CHECK-DAG: 0x[[#%X,HIWORLD]] 0x0000000E [ 3] literal string: Hello world!\n
> +# CHECK-DAG: 0x[[#%X,HIITSME]] 0x0000000F [ 3] literal string: Hello, it's me
> +# CHECK-DAG: 0x[[#%X,HIITSME + 0xf]] 0x0000000E [ 3] literal string: Hello world!\n
> +# CHECK-DAG: 0x[[#%X,NUMBER]] 0x00000001 [ 1] _number
>
> # MAPFILE: "name":"Total Write map file"
>
> -# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
> +# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
> # RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
>
> ## C-string literals should be printed as "literal string: <C string literal>"
> # STRIPPED-LABEL: Dead Stripped Symbols:
> -# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] _foo
> -# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
> -# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
> -# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
> +# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _bar
> +# STRIPPED-DAG: <<dead>> 0x00000001 [ 1] _number
> +# STRIPPED-DAG: <<dead>> 0x00000001 [ 2] __ZTIN3foo3bar4MethE
> +# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
> +# STRIPPED-DAG: <<dead>> 0x0000000F [ 3] literal string: Hello, it's me
> +# STRIPPED-DAG: <<dead>> 0x0000000E [ 3] literal string: Hello world!\n
>
> # RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
> # RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
>
> +## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
> +## folded symbols but not folded cstrings; we print both.
> +
> # ICF: Symbols:
> -# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] _foo
> -# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
> +# ICF-DAG: 0x[[#%X,FOO:]] 0x00000000 [ 2] __ZTIN3foo3bar4MethE
> +# ICF-DAG: 0x[[#FOO]] 0x00000001 [ 1] _bar
> +# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E [ 3] literal string: Hello world!\n
> +# ICF-DAG: 0x[[#%X,HIWORLD]] 0x00000000 [ 3] literal string: Hello world!\n
>
> #--- foo.s
> -## ICF will only fold sections marked as pure_instructions
> -.section __TEXT,obj,regular,pure_instructions
> -.globl _foo
> -.alt_entry _alt_foo
> -_foo:
> +.globl __ZTIN3foo3bar4MethE
> +## This C++ symbol makes it clear that we do not print the demangled name in
> +## the map file, even if `-demangle` is passed.
> +__ZTIN3foo3bar4MethE:
> nop
>
> .subsections_via_symbols
> @@ -79,12 +87,10 @@ _foo:
> #--- test.s
> .comm _number, 1
> .globl _main, _bar
> -.alt_entry _alt_bar
>
> _main:
> ret
>
> -.section __TEXT,obj,regular,pure_instructions
> _bar:
> nop
>
> @@ -101,4 +107,6 @@ _hello_world:
> _hello_its_me:
> .asciz "Hello, it's me"
>
> +.asciz "Hello world!\n"
> +
> .subsections_via_symbols
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list