[lld] 38d6202 - Revert "[lld-macho] Overhaul map file code"

Muhammad Omair Javaid via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 17 00:13:45 PST 2022


Author: Muhammad Omair Javaid
Date: 2022-11-17T12:13:13+04:00
New Revision: 38d6202a425462ce5923d038bc54532115a80a1f

URL: https://github.com/llvm/llvm-project/commit/38d6202a425462ce5923d038bc54532115a80a1f
DIFF: https://github.com/llvm/llvm-project/commit/38d6202a425462ce5923d038bc54532115a80a1f.diff

LOG: Revert "[lld-macho] Overhaul map file code"

This reverts commit 213dbdbef0bad835abca0753f9e59b17dc2bcde2.
This patch series breaks lld:map-file.s on arm v7 linux buildbots.
e.g https://lab.llvm.org/buildbot/#/builders/178/builds/3190

Added: 
    

Modified: 
    lld/MachO/MapFile.cpp
    lld/test/MachO/map-file.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 5d6c87baba9f..8f1b6a13330a 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -6,10 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the -map option, which maps address ranges to their
-// respective contents, plus the input file these contents were originally from.
-// The contents (typically symbols) are listed in address order. Dead-stripped
-// contents are included as well.
+// This file implements the -map option. It shows lists in order and
+// hierarchically the outputFile, arch, input files, output sections and
+// symbols:
 //
 // # Path: test
 // # Arch: x86_84
@@ -29,16 +28,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "MapFile.h"
-#include "ConcatOutputSection.h"
 #include "Config.h"
 #include "InputFiles.h"
 #include "InputSection.h"
+#include "OutputSection.h"
 #include "OutputSegment.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "lld/Common/ErrorHandler.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/TimeProfiler.h"
 
@@ -47,77 +45,71 @@ using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
-struct CStringInfo {
-  uint32_t fileIndex;
-  StringRef str;
-};
-
 struct MapInfo {
   SmallVector<InputFile *> files;
+  SmallVector<Defined *> liveSymbols;
   SmallVector<Defined *> deadSymbols;
-  DenseMap<const OutputSection *,
-           SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
-      liveCStringsForSection;
-  SmallVector<CStringInfo> deadCStrings;
 };
 
 static MapInfo gatherMapInfo() {
   MapInfo info;
   for (InputFile *file : inputFiles)
     if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
-      uint32_t fileIndex = info.files.size() + 1;
-      bool isReferencedFile = false;
-
-      // Gather the dead symbols. We don't have to bother with the live ones
-      // because we will pick them up as we iterate over the OutputSections
-      // later.
+      bool hasEmittedSymbol = false;
       for (Symbol *sym : file->symbols) {
         if (auto *d = dyn_cast_or_null<Defined>(sym))
-          // Only emit the prevailing definition of a symbol. Also, don't emit
-          // the symbol if it is part of a cstring section (we use the literal
-          // value instead, similar to ld64)
-          if (d->isec && d->getFile() == file &&
-              !isa<CStringInputSection>(d->isec)) {
-            isReferencedFile = true;
-            if (!d->isLive())
+          if (d->isec && d->getFile() == file) {
+            if (d->isLive()) {
+              assert(!shouldOmitFromOutput(d->isec));
+              info.liveSymbols.push_back(d);
+            } else {
               info.deadSymbols.push_back(d);
-          }
-      }
-
-      // Gather all the cstrings (both live and dead). A CString(Output)Section
-      // doesn't provide us a way of figuring out which InputSections its
-      // cstring contents came from, so we need to build up that mapping here.
-      for (const Section *sec : file->sections) {
-        for (const Subsection &subsec : sec->subsections) {
-          if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
-            auto &liveCStrings = info.liveCStringsForSection[isec->parent];
-            for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
-              if (piece.live)
-                liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
-                                        {fileIndex, isec->getStringRef(i)}});
-              else
-                info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
-              isReferencedFile = true;
             }
-          } else {
-            break;
+            hasEmittedSymbol = true;
           }
-        }
       }
-
-      if (isReferencedFile)
+      if (hasEmittedSymbol)
         info.files.push_back(file);
     }
-
-  // cstrings are not stored in sorted order in their OutputSections, so we sort
-  // them here.
-  for (auto &liveCStrings : info.liveCStringsForSection)
-    parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
-      return p1.first < p2.first;
-    });
+  parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
+               [](Defined *a, Defined *b) { return a->getVA() < b->getVA(); });
   return info;
 }
 
+// Construct a map from symbols to their stringified representations.
+// Demangling symbols (which is what toString() does) is slow, so
+// we do that in batch using parallel-for.
+static DenseMap<Symbol *, std::string>
+getSymbolStrings(ArrayRef<Defined *> syms) {
+  std::vector<std::string> str(syms.size());
+  parallelFor(0, syms.size(), [&](size_t i) {
+    raw_string_ostream os(str[i]);
+    Defined *sym = syms[i];
+
+    switch (sym->isec->kind()) {
+    case InputSection::CStringLiteralKind: {
+      // Output "literal string: <string literal>"
+      const auto *isec = cast<CStringInputSection>(sym->isec);
+      const StringPiece &piece = isec->getStringPiece(sym->value);
+      assert(
+          sym->value == piece.inSecOff &&
+          "We expect symbols to always point to the start of a StringPiece.");
+      StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
+      (os << "literal string: ").write_escaped(str);
+      break;
+    }
+    case InputSection::ConcatKind:
+    case InputSection::WordLiteralKind:
+      os << toString(*sym);
+    }
+  });
+
+  DenseMap<Symbol *, std::string> ret;
+  for (size_t i = 0, e = syms.size(); i < e; ++i)
+    ret[syms[i]] = std::move(str[i]);
+  return ret;
+}
+
 void macho::writeMapFile() {
   if (config->mapFile.empty())
     return;
@@ -132,12 +124,16 @@ void macho::writeMapFile() {
     return;
   }
 
+  // Dump output path.
   os << format("# Path: %s\n", config->outputFile.str().c_str());
+
+  // Dump output architecture.
   os << format("# Arch: %s\n",
                getArchitectureName(config->arch()).str().c_str());
 
   MapInfo info = gatherMapInfo();
 
+  // Dump table of object files.
   os << "# Object files:\n";
   os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
   uint32_t fileIndex = 1;
@@ -147,6 +143,7 @@ void macho::writeMapFile() {
     readerToFileOrdinal[file] = fileIndex++;
   }
 
+  // Dump table of sections
   os << "# Sections:\n";
   os << "# Address\tSize    \tSegment\tSection\n";
   for (OutputSegment *seg : outputSegments)
@@ -158,48 +155,28 @@ void macho::writeMapFile() {
                    seg->name.str().c_str(), osec->name.str().c_str());
     }
 
+  // Dump table of symbols
+  DenseMap<Symbol *, std::string> liveSymbolStrings =
+      getSymbolStrings(info.liveSymbols);
   os << "# Symbols:\n";
   os << "# Address\tSize    \tFile  Name\n";
-  for (const OutputSegment *seg : outputSegments) {
-    for (const OutputSection *osec : seg->getSections()) {
-      if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
-        for (const InputSection *isec : concatOsec->inputs) {
-          for (Defined *sym : isec->symbols)
-            os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
-                         sym->size, readerToFileOrdinal[sym->getFile()],
-                         sym->getName().str().data());
-        }
-      } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
-        const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
-        uint64_t lastAddr = 0; // strings will never start at address 0, so this
-                               // is a sentinel value
-        for (const auto &[addr, info] : liveCStrings) {
-          uint64_t size = 0;
-          if (addr != lastAddr)
-            size = info.str.size() + 1; // include null terminator
-          lastAddr = addr;
-          os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
-                       info.fileIndex);
-          os.write_escaped(info.str) << "\n";
-        }
-      }
-      // TODO print other synthetic sections
-    }
+  for (Defined *sym : info.liveSymbols) {
+    assert(sym->isLive());
+    os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
+                 readerToFileOrdinal[sym->getFile()],
+                 liveSymbolStrings[sym].c_str());
   }
 
   if (config->deadStrip) {
+    DenseMap<Symbol *, std::string> deadSymbolStrings =
+        getSymbolStrings(info.deadSymbols);
     os << "# Dead Stripped Symbols:\n";
     os << "#        \tSize    \tFile  Name\n";
     for (Defined *sym : info.deadSymbols) {
       assert(!sym->isLive());
       os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
                    readerToFileOrdinal[sym->getFile()],
-                   sym->getName().str().data());
-    }
-    for (CStringInfo &cstrInfo : info.deadCStrings) {
-      os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
-                   cstrInfo.str.size() + 1, cstrInfo.fileIndex);
-      os.write_escaped(cstrInfo.str) << "\n";
+                   deadSymbolStrings[sym].c_str());
     }
   }
 }

diff  --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
index fe1ef88604e1..ac5ae9d02074 100644
--- a/lld/test/MachO/map-file.s
+++ b/lld/test/MachO/map-file.s
@@ -4,24 +4,23 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/c-string-literal.s -o %t/c-string-literal.o
 
-# RUN: %lld -demangle -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o \
-# RUN:   --time-trace -o %t/test
+# RUN: %lld -map %t/map %t/test.o %t/foo.o %t/c-string-literal.o --time-trace -o %t/test
 # RUN: llvm-objdump --syms --section-headers %t/test > %t/objdump
-## Check that symbols in cstring sections aren't emitted
-# RUN: cat %t/objdump %t/map | FileCheck %s --implicit-check-not _hello_world
+# RUN: cat %t/objdump %t/map > %t/out
+# RUN: FileCheck %s < %t/out
 # RUN: FileCheck %s --check-prefix=MAPFILE < %t/test.time-trace
 
 # CHECK:      Sections:
-# CHECK-NEXT: Idx  Name          Size           VMA               Type
-# CHECK-NEXT: 0    __text        {{[0-9a-f]+}}  [[#%x,TEXT:]]     TEXT
-# CHECK-NEXT: 1    __cstring     {{[0-9a-f]+}}  [[#%x,CSTR:]]     DATA
-# CHECK-NEXT: 2    __common      {{[0-9a-f]+}}  [[#%x,BSS:]]      BSS
+# CHECK-NEXT: Idx  Name          Size           VMA           Type
+# CHECK-NEXT: 0    __text        {{[0-9a-f]+}}  [[#%x,TEXT:]] TEXT
+# CHECK-NEXT: 1    obj           {{[0-9a-f]+}}  [[#%x,DATA:]] TEXT
+# CHECK-NEXT: 2    __cstring     {{[0-9a-f]+}}  [[#%x,CSTR:]] DATA
+# CHECK-NEXT: 3    __common      {{[0-9a-f]+}}  [[#%x,BSS:]]  BSS
 
 # CHECK:      SYMBOL TABLE:
 # CHECK-DAG:  [[#%x,MAIN:]]    g     F __TEXT,__text _main
 # CHECK-DAG:  [[#%x,NUMBER:]]  g     O __DATA,__common _number
-# CHECK-DAG:  [[#%x,BAR:]]     g     F __TEXT,__text _bar
-# CHECK-DAG:  [[#%x,FOO:]]     g     F __TEXT,__text __ZTIN3foo3bar4MethE
+# CHECK-DAG:  [[#%x,FOO:]]     g     F __TEXT,obj _foo
 # CHECK-DAG:  [[#%x,HIWORLD:]] g     O __TEXT,__cstring _hello_world
 # CHECK-DAG:  [[#%x,HIITSME:]] g     O __TEXT,__cstring _hello_its_me
 
@@ -36,50 +35,43 @@
 # CHECK-NEXT: # Sections:
 # CHECK-NEXT: # Address       Size              Segment  Section
 # CHECK-NEXT: 0x[[#%X,TEXT]]  0x{{[0-9A-F]+}}   __TEXT   __text
+# CHECK-NEXT: 0x[[#%X,DATA]]  0x{{[0-9A-F]+}}   __TEXT   obj
 # CHECK-NEXT: 0x[[#%X,CSTR]]  0x{{[0-9A-F]+}}   __TEXT   __cstring
 # CHECK-NEXT: 0x[[#%X,BSS]]   0x{{[0-9A-F]+}}   __DATA   __common
 
 # CHECK-NEXT: # Symbols:
-# CHECK-NEXT: # Address                Size        File   Name
-# CHECK-DAG:  0x[[#%X,MAIN]]           0x00000001  [  1]  _main
-# CHECK-DAG:  0x[[#%X,BAR]]            0x00000001  [  1]  _bar
-# CHECK-DAG:  0x[[#%X,FOO]]            0x00000001  [  2]  __ZTIN3foo3bar4MethE
-# CHECK-DAG:  0x[[#%X,HIWORLD]]        0x0000000E  [  3]  literal string: Hello world!\n
-# CHECK-DAG:  0x[[#%X,HIITSME]]        0x0000000F  [  3]  literal string: Hello, it's me
-# CHECK-DAG:  0x[[#%X,HIITSME + 0xf]]  0x0000000E  [  3]  literal string: Hello world!\n
-# CHECK-DAG:  0x[[#%X,NUMBER]]         0x00000001  [  1]  _number
+# CHECK-NEXT: # Address           Size        File   Name
+# CHECK-DAG:  0x[[#%X,MAIN]]      0x00000001  [  1]  _main
+# CHECK-DAG:  0x[[#%X,FOO]]       0x00000001  [  2]  _foo
+# CHECK-DAG:  0x[[#%X,HIWORLD]]   0x0000000E  [  3]  literal string: Hello world!\n
+# CHECK-DAG:  0x[[#%X,HIITSME]]   0x0000000F  [  3]  literal string: Hello, it's me
+# CHECK-DAG:  0x[[#%X,NUMBER]]    0x00000001  [  1]  _number
 
 # MAPFILE: "name":"Total Write map file"
 
-# RUN: %lld -demangle -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
+# RUN: %lld -dead_strip -map %t/stripped-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/stripped
 # RUN: FileCheck --check-prefix=STRIPPED %s < %t/stripped-map
 
 ## C-string literals should be printed as "literal string: <C string literal>"
 # STRIPPED-LABEL: Dead Stripped Symbols:
-# STRIPPED-DAG:   <<dead>>	0x00000001	[  1] _bar
-# STRIPPED-DAG:   <<dead>>	0x00000001	[  1] _number
-# STRIPPED-DAG:   <<dead>>	0x00000001	[  2] __ZTIN3foo3bar4MethE
-# STRIPPED-DAG:   <<dead>>	0x0000000E	[  3] literal string: Hello world!\n
-# STRIPPED-DAG:   <<dead>>	0x0000000F	[  3] literal string: Hello, it's me
-# STRIPPED-DAG:   <<dead>>	0x0000000E	[  3] literal string: Hello world!\n
+# STRIPPED-DAG:   <<dead>> 0x00000001 [  2] _foo
+# STRIPPED-DAG:   <<dead>> 0x0000000E [  3] literal string: Hello world!\n
+# STRIPPED-DAG:   <<dead>> 0x0000000F [  3] literal string: Hello, it's me
+# STRIPPED-DAG:   <<dead>> 0x00000001 [  1] _number
 
 # RUN: %lld --icf=all -map %t/icf-map %t/test.o %t/foo.o %t/c-string-literal.o -o %t/icf
 # RUN: FileCheck --check-prefix=ICF %s < %t/icf-map
 
-## Verify that folded symbols and cstrings have size zero. Note that ld64 prints
-## folded symbols but not folded cstrings; we print both.
-
 # ICF:     Symbols:
-# ICF-DAG: 0x[[#%X,FOO:]]     0x00000000  [  2] __ZTIN3foo3bar4MethE
-# ICF-DAG: 0x[[#FOO]]         0x00000001  [  1] _bar
-# ICF-DAG: 0x[[#%X,HIWORLD:]] 0x0000000E  [  3]  literal string: Hello world!\n
-# ICF-DAG: 0x[[#%X,HIWORLD]]  0x00000000  [  3]  literal string: Hello world!\n
+# ICF-DAG: 0x[[#%X,FOO:]]  0x00000000  [  2] _foo
+# ICF-DAG: 0x[[#FOO]]      0x00000001  [  1] _bar
 
 #--- foo.s
-.globl __ZTIN3foo3bar4MethE
-## This C++ symbol makes it clear that we do not print the demangled name in
-## the map file, even if `-demangle` is passed.
-__ZTIN3foo3bar4MethE:
+## ICF will only fold sections marked as pure_instructions
+.section __TEXT,obj,regular,pure_instructions
+.globl _foo
+.alt_entry _alt_foo
+_foo:
   nop
 
 .subsections_via_symbols
@@ -87,10 +79,12 @@ __ZTIN3foo3bar4MethE:
 #--- test.s
 .comm _number, 1
 .globl _main, _bar
+.alt_entry _alt_bar
 
 _main:
   ret
 
+.section __TEXT,obj,regular,pure_instructions
 _bar:
   nop
 
@@ -107,6 +101,4 @@ _hello_world:
 _hello_its_me:
 .asciz "Hello, it's me"
 
-.asciz "Hello world!\n"
-
 .subsections_via_symbols


        


More information about the llvm-commits mailing list