[lld] 7ca32bd - Reland "[lld-macho] Overhaul map file code"

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 5 13:59:24 PST 2022


Reminder to please always mention the reason for the revert in the
commit message.

On Tue, Dec 6, 2022 at 12:57 AM Jez Ng via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Jez Ng
> Date: 2022-12-05T16:57:35-05:00
> New Revision: 7ca32bd402ddc31ace69d0a85362240c961b4f1d
>
> URL: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d
> DIFF: https://github.com/llvm/llvm-project/commit/7ca32bd402ddc31ace69d0a85362240c961b4f1d.diff
>
> LOG: Reland "[lld-macho] Overhaul map file code"
>
> This reverts commit 38d6202a425462ce5923d038bc54532115a80a1f.
>
> Differential Revision: https://reviews.llvm.org/D137368
>
> Added:
>
>
> Modified:
>     lld/MachO/MapFile.cpp
>     lld/test/MachO/map-file.s
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
> index 8f1b6a13330a..5d6c87baba9f 100644
> --- a/lld/MachO/MapFile.cpp
> +++ b/lld/MachO/MapFile.cpp
> @@ -6,9 +6,10 @@
>  //
>  //===----------------------------------------------------------------------===//
>  //
> -// This file implements the -map option. It shows lists in order and
> -// hierarchically the outputFile, arch, input files, output sections and
> -// symbols:
> +// This file implements the -map option, which maps address ranges to their
> +// respective contents, plus the input file these contents were originally from.
> +// The contents (typically symbols) are listed in address order. Dead-stripped
> +// contents are included as well.
>  //
>  // # Path: test
>  // # Arch: x86_84
> @@ -28,15 +29,16 @@
>  //===----------------------------------------------------------------------===//
>
>  #include "MapFile.h"
> +#include "ConcatOutputSection.h"
>  #include "Config.h"
>  #include "InputFiles.h"
>  #include "InputSection.h"
> -#include "OutputSection.h"
>  #include "OutputSegment.h"
>  #include "Symbols.h"
>  #include "SyntheticSections.h"
>  #include "Target.h"
>  #include "lld/Common/ErrorHandler.h"
> +#include "llvm/ADT/DenseMap.h"
>  #include "llvm/Support/Parallel.h"
>  #include "llvm/Support/TimeProfiler.h"
>
> @@ -45,69 +47,75 @@ using namespace llvm::sys;
>  using namespace lld;
>  using namespace lld::macho;
>
> +struct CStringInfo {
> +  uint32_t fileIndex;
> +  StringRef str;
> +};
> +
>  struct MapInfo {
>    SmallVector<InputFile *> files;
> -  SmallVector<Defined *> liveSymbols;
>    SmallVector<Defined *> deadSymbols;
> +  DenseMap<const OutputSection *,
> +           SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
> +      liveCStringsForSection;
> +  SmallVector<CStringInfo> deadCStrings;
>  };
>
>  static MapInfo gatherMapInfo() {
>    MapInfo info;
>    for (InputFile *file : inputFiles)
>      if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
> -      bool hasEmittedSymbol = false;
> +      uint32_t fileIndex = info.files.size() + 1;
> +      bool isReferencedFile = false;
> +
> +      // Gather the dead symbols. We don't have to bother with the live ones
> +      // because we will pick them up as we iterate over the OutputSections
> +      // later.
>        for (Symbol *sym : file->symbols) {
>          if (auto *d = dyn_cast_or_null<Defined>(sym))
> -          if (d->isec && d->getFile() == file) {
> -            if (d->isLive()) {
> -              assert(!shouldOmitFromOutput(d->isec));
> -              info.liveSymbols.push_back(d);
> -            } else {
> +          // Only emit the prevailing definition of a symbol. Also, don't emit
> +          // the symbol if it is part of a cstring section (we use the literal
> +          // value instead, similar to ld64)
> +          if (d->isec && d->getFile() == file &&
> +              !isa<CStringInputSection>(d->isec)) {
> +            isReferencedFile = true;
> +            if (!d->isLive())
>                info.deadSymbols.push_back(d);
> +          }
> +      }
> +
> +      // Gather all the cstrings (both live and dead). A CString(Output)Section
> +      // doesn't provide us a way of figuring out which InputSections its
> +      // cstring contents came from, so we need to build up that mapping here.
> +      for (const Section *sec : file->sections) {
> +        for (const Subsection &subsec : sec->subsections) {
> +          if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
> +            auto &liveCStrings = info.liveCStringsForSection[isec->parent];
> +            for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
> +              if (piece.live)
> +                liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
> +                                        {fileIndex, isec->getStringRef(i)}});
> +              else
> +                info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
> +              isReferencedFile = true;
>              }
> -            hasEmittedSymbol = true;
> +          } else {
> +            break;
>            }
> +        }
>        }
> -      if (hasEmittedSymbol)
> -        info.files.push_back(file);
> -    }
> -  parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
> -               [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
> -  return info;
> -}
>
> -// Construct a map from symbols to their stringified representations.
> -// Demangling symbols (which is what toString() does) is slow, so
> -// we do that in batch using parallel-for.
> -static DenseMap<Symbol *, std::string>
> -getSymbolStrings(ArrayRef<Defined *> syms) {
> -  std::vector<std::string> str(syms.size());
> -  parallelFor(0, syms.size(), [&](size_t i) {
> -    raw_string_ostream os(str[i]);
> -    Defined *sym = syms[i];
> -
> -    switch (sym->isec->kind()) {
> -    case InputSection::CStringLiteralKind: {
> -      // Output "literal string: <string literal>"
> -      const auto *isec = cast<CStringInputSection>(sym->isec);
> -      const StringPiece &piece = isec->getStringPiece(sym->value);
> -      assert(
> -          sym->value == piece.inSecOff &&
> -          "We expect symbols to always point to the start of a StringPiece.");
> -      StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
> -      (os << "literal string: ").write_escaped(str);
> -      break;
> -    }
> -    case InputSection::ConcatKind:
> -    case InputSection::WordLiteralKind:
> -      os << toString(*sym);
> +      if (isReferencedFile)
> +        info.files.push_back(file);
>      }
> -  });
>
> -  DenseMap<Symbol *, std::string> ret;
> -  for (size_t i = 0, e = syms.size(); i < e; ++i)
> -    ret[syms[i]] = std::move(str[i]);
> -  return ret;
> +  // cstrings are not stored in sorted order in their OutputSections, so we sort
> +  // them here.
> +  for (auto &liveCStrings : info.liveCStringsForSection)
> +    parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
> +      return p1.first < p2.first;
> +    });
> +  return info;
>  }
>
>  void macho::writeMapFile() {
> @@ -124,16 +132,12 @@ void macho::writeMapFile() {
>      return;
>    }
>
> -  // Dump output path.
>    os << format("# Path: %s\n", config->outputFile.str().c_str());
> -
> -  // Dump output architecture.
>    os << format("# Arch: %s\n",
>                 getArchitectureName(config->arch()).str().c_str());
>
>    MapInfo info = gatherMapInfo();
>
> -  // Dump table of object files.
>    os << "# Object files:\n";
>    os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
>    uint32_t fileIndex = 1;
> @@ -143,7 +147,6 @@ void macho::writeMapFile() {
>      readerToFileOrdinal[file] = fileIndex++;
>    }
>
> -  // Dump table of sections
>    os << "# Sections:\n";
>    os << "# Address\tSize    \tSegment\tSection\n";
>    for (OutputSegment *seg : outputSegments)
> @@ -155,28 +158,48 @@ void macho::writeMapFile() {
>                     seg->name.str().c_str(), osec->name.str().c_str());
>      }
>
> -  // Dump table of symbols
> -  DenseMap<Symbol *, std::string> liveSymbolStrings =
> -      getSymbolStrings(info.liveSymbols);
>    os << "# Symbols:\n";
>    os << "# Address\tSize    \tFile  Name\n";
> -  for (Defined *sym : info.liveSymbols) {
> -    assert(sym->isLive());
> -    os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
> -                 readerToFileOrdinal[sym->getFile()],
> -                 liveSymbolStrings[sym].c_str());
> +  for (const OutputSegment *seg : outputSegments) {
> +    for (const OutputSection *osec : seg->getSections()) {
> +      if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
> +        for (const InputSection *isec : concatOsec->inputs) {
> +          for (Defined *sym : isec->symbols)
> +            os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
> +                         sym->size, readerToFileOrdinal[sym->getFile()],
> +                         sym->getName().str().data());
> +        }
> +      } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
> +        const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
> +        uint64_t lastAddr = 0; // strings will never start at address 0, so this
> +                               // is a sentinel value
> +        for (const auto &[addr, info] : liveCStrings) {
> +          uint64_t size = 0;
> +          if (addr != lastAddr)
> +            size = info.str.size() + 1; // include null terminator
> +          lastAddr = addr;
> +          os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
> +                       info.fileIndex);
> +          os.write_escaped(info.str) << "\n";
> +        }
> +      }
> +      // TODO print other synthetic sections
> +    }
>    }
>
>    if (config->deadStrip) {
> -    DenseMap<Symbol *, std::string> deadSymbolStrings =
> -        getSymbolStrings(info.deadSymbols);
>      os << "# Dead Stripped Symbols:\n";
>      os << "#        \tSize    \tFile  Name\n";
>      for (Defined *sym : info.deadSymbols) {
>        assert(!sym->isLive());
>        os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
>                     readerToFileOrdinal[sym->getFile()],
> -                   deadSymbolStrings[sym].c_str());
> +                   sym->getName().str().data());
> +    }
> +    for (CStringInfo &cstrInfo : info.deadCStrings) {
> +      os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
> +                   cstrInfo.str.size() + 1, cstrInfo.fileIndex);
> +      os.write_escaped(cstrInfo.str) << "\n";
>      }
>    }
>  }
>
> diff  --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
> index ac5ae9d02074..fe1ef88604e1 100644
> --- a/lld/test/MachO/map-file.s
> +++ b/lld/test/MachO/map-file.s
> @@ -4,23 +4,24 @@
>  # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
>  # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
>
> -# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
> +# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
> +# RUN:   --time-trace -o %t/test
>  # RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
> -# RUN: cat %t/objdump %t/map > %t/out
> -# RUN: FileCheck %s < %t/out
> +## Check that symbols in cstring sections aren't emitted
> +# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
>  # RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
>
>  # CHECK:      Sections:
> -# CHECK-NEXT: Idx  Name          Size           VMA           Type
> -# CHECK-NEXT: 0    __text        {{[0-9a-f]+}}  [[#%x,TEXT:]] TEXT
> -# CHECK-NEXT: 1    obj           {{[0-9a-f]+}}  [[#%x,DATA:]] TEXT
> -# CHECK-NEXT: 2    __cstring     {{[0-9a-f]+}}  [[#%x,CSTR:]] DATA
> -# CHECK-NEXT: 3    __common      {{[0-9a-f]+}}  [[#%x,BSS:]]  BSS
> +# CHECK-NEXT: Idx  Name          Size           VMA               Type
> +# CHECK-NEXT: 0    __text        {{[0-9a-f]+}}  [[#%x,TEXT:]]     TEXT
> +# CHECK-NEXT: 1    __cstring     {{[0-9a-f]+}}  [[#%x,CSTR:]]     DATA
> +# CHECK-NEXT: 2    __common      {{[0-9a-f]+}}  [[#%x,BSS:]]      BSS
>
>  # CHECK:      SYMBOL TABLE:
>  # CHECK-DAG:  [[#%x,MAIN:]]    g     F __TEXT,__text _main
>  # CHECK-DAG:  [[#%x,NUMBER:]]  g     O __DATA,__common _number
> -# CHECK-DAG:  [[#%x,FOO:]]     g     F __TEXT,obj _foo
> +# CHECK-DAG:  [[#%x,BAR:]]     g     F __TEXT,__text _bar
> +# CHECK-DAG:  [[#%x,FOO:]]     g     F __TEXT,__text __ZTIN3foo3bar4MethE
>  # CHECK-DAG:  [[#%x,HIWORLD:]] g     O __TEXT,__cstring _hello_world
>  # CHECK-DAG:  [[#%x,HIITSME:]] g     O __TEXT,__cstring _hello_its_me
>
> @@ -35,43 +36,50 @@
>  # CHECK-NEXT: # Sections:
>  # CHECK-NEXT: # Address       Size              Segment  Section
>  # CHECK-NEXT: 0x[[#%X,TEXT]]  0x{{[0-9A-F]+}}   __TEXT   __text
> -# CHECK-NEXT: 0x[[#%X,DATA]]  0x{{[0-9A-F]+}}   __TEXT   obj
>  # CHECK-NEXT: 0x[[#%X,CSTR]]  0x{{[0-9A-F]+}}   __TEXT   __cstring
>  # CHECK-NEXT: 0x[[#%X,BSS]]   0x{{[0-9A-F]+}}   __DATA   __common
>
>  # CHECK-NEXT: # Symbols:
> -# CHECK-NEXT: # Address           Size        File   Name
> -# CHECK-DAG:  0x[[#%X,MAIN]]      0x00000001  [  1]  _main
> -# CHECK-DAG:  0x[[#%X,FOO]]       0x00000001  [  2]  _foo
> -# CHECK-DAG:  0x[[#%X,HIWORLD]]   0x0000000E  [  3]  literal string: Hello world!\n
> -# CHECK-DAG:  0x[[#%X,HIITSME]]   0x0000000F  [  3]  literal string: Hello, it's me
> -# CHECK-DAG:  0x[[#%X,NUMBER]]    0x00000001  [  1]  _number
> +# CHECK-NEXT: # Address                Size        File   Name
> +# CHECK-DAG:  0x[[#%X,MAIN]]           0x00000001  [  1]  _main
> +# CHECK-DAG:  0x[[#%X,BAR]]            0x00000001  [  1]  _bar
> +# CHECK-DAG:  0x[[#%X,FOO]]            0x00000001  [  2]  __ZTIN3foo3bar4MethE
> +# CHECK-DAG:  0x[[#%X,HIWORLD]]        0x0000000E  [  3]  literal string: Hello world!\n
> +# CHECK-DAG:  0x[[#%X,HIITSME]]        0x0000000F  [  3]  literal string: Hello, it's me
> +# CHECK-DAG:  0x[[#%X,HIITSME + 0xf]]  0x0000000E  [  3]  literal string: Hello world!\n
> +# CHECK-DAG:  0x[[#%X,NUMBER]]         0x00000001  [  1]  _number
>
>  # MAPFILE: "name":"Total Write map file"
>
> -# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
> +# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
>  # RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
>
>  ## C-string literals should be printed as "literal string: <C string literal>"
>  # STRIPPED-LABEL: Dead Stripped Symbols:
> -# STRIPPED-DAG:   <<dead>> 0x00000001 [  2] _foo
> -# STRIPPED-DAG:   <<dead>> 0x0000000E [  3] literal string: Hello world!\n
> -# STRIPPED-DAG:   <<dead>> 0x0000000F [  3] literal string: Hello, it's me
> -# STRIPPED-DAG:   <<dead>> 0x00000001 [  1] _number
> +# STRIPPED-DAG:   <<dead>>     0x00000001      [  1] _bar
> +# STRIPPED-DAG:   <<dead>>     0x00000001      [  1] _number
> +# STRIPPED-DAG:   <<dead>>     0x00000001      [  2] __ZTIN3foo3bar4MethE
> +# STRIPPED-DAG:   <<dead>>     0x0000000E      [  3] literal string: Hello world!\n
> +# STRIPPED-DAG:   <<dead>>     0x0000000F      [  3] literal string: Hello, it's me
> +# STRIPPED-DAG:   <<dead>>     0x0000000E      [  3] literal string: Hello world!\n
>
>  # RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
>  # RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
>
> +## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
> +## folded symbols but not folded cstrings; we print both.
> +
>  # ICF:     Symbols:
> -# ICF-DAG: 0x[[#%X,FOO:]]  0x00000000  [  2] _foo
> -# ICF-DAG: 0x[[#FOO]]      0x00000001  [  1] _bar
> +# ICF-DAG: 0x[[#%X,FOO:]]     0x00000000  [  2] __ZTIN3foo3bar4MethE
> +# ICF-DAG: 0x[[#FOO]]         0x00000001  [  1] _bar
> +# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E  [  3]  literal string: Hello world!\n
> +# ICF-DAG: 0x[[#%X,HIWORLD]]  0x00000000  [  3]  literal string: Hello world!\n
>
>  #--- foo.s
> -## ICF will only fold sections marked as pure_instructions
> -.section __TEXT,obj,regular,pure_instructions
> -.globl _foo
> -.alt_entry _alt_foo
> -_foo:
> +.globl __ZTIN3foo3bar4MethE
> +## This C++ symbol makes it clear that we do not print the demangled name in
> +## the map file, even if `-demangle` is passed.
> +__ZTIN3foo3bar4MethE:
>    nop
>
>  .subsections_via_symbols
> @@ -79,12 +87,10 @@ _foo:
>  #--- test.s
>  .comm _number, 1
>  .globl _main, _bar
> -.alt_entry _alt_bar
>
>  _main:
>    ret
>
> -.section __TEXT,obj,regular,pure_instructions
>  _bar:
>    nop
>
> @@ -101,4 +107,6 @@ _hello_world:
>  _hello_its_me:
>  .asciz "Hello, it's me"
>
> +.asciz "Hello world!\n"
> +
>  .subsections_via_symbols
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list