[lld] b945733 - [lld-macho] Map file should map symbols to their original bitcode file

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 21 19:49:14 PDT 2022


Author: Jez Ng
Date: 2022-10-21T22:49:02-04:00
New Revision: b9457330266e12364e8949939f899135c41d88b3

URL: https://github.com/llvm/llvm-project/commit/b9457330266e12364e8949939f899135c41d88b3
DIFF: https://github.com/llvm/llvm-project/commit/b9457330266e12364e8949939f899135c41d88b3.diff

LOG: [lld-macho] Map file should map symbols to their original bitcode file

... instead of mapping them to the intermediate object file.
This matches ld64.

Reviewed By: #lld-macho, Roger

Differential Revision: https://reviews.llvm.org/D136380

Added: 
    lld/test/MachO/map-file.ll

Modified: 
    lld/MachO/LTO.cpp
    lld/MachO/MapFile.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index f465c41aec6c..d698e38611d6 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -108,7 +108,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
     // load the ObjFile emitted by LTO compilation.
     if (r.Prevailing)
       replaceSymbol<Undefined>(sym, sym->getName(), sym->getFile(),
-                               RefState::Strong);
+                               RefState::Strong, /*wasBitcodeSymbol=*/true);
 
     // TODO: set the other resolution configs properly
   }

diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 41f72ebcb331..0a8053cd7871 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -45,32 +45,41 @@ using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
-using Symbols = std::vector<Defined *>;
-// Returns a pair where the left element is a container of all live Symbols and
-// the right element is a container of all dead symbols.
-static std::pair<Symbols, Symbols> getSymbols() {
-  Symbols liveSymbols, deadSymbols;
+struct MapInfo {
+  SmallVector<InputFile *> files;
+  SmallVector<Defined *> liveSymbols;
+  SmallVector<Defined *> deadSymbols;
+};
+
+static MapInfo gatherMapInfo() {
+  MapInfo info;
   for (InputFile *file : inputFiles)
-    if (isa<ObjFile>(file))
-      for (Symbol *sym : file->symbols)
+    if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
+      bool hasEmittedSymbol = false;
+      for (Symbol *sym : file->symbols) {
         if (auto *d = dyn_cast_or_null<Defined>(sym))
           if (d->isec && d->getFile() == file) {
             if (d->isLive()) {
               assert(!shouldOmitFromOutput(d->isec));
-              liveSymbols.push_back(d);
+              info.liveSymbols.push_back(d);
             } else {
-              deadSymbols.push_back(d);
+              info.deadSymbols.push_back(d);
             }
+            hasEmittedSymbol = true;
           }
-  parallelSort(liveSymbols.begin(), liveSymbols.end(),
+      }
+      if (hasEmittedSymbol)
+        info.files.push_back(file);
+    }
+  parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
                [](Defined *a, Defined *b) {
                  return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
                                                  : a->getName() < b->getName();
                });
   parallelSort(
-      deadSymbols.begin(), deadSymbols.end(),
+      info.deadSymbols.begin(), info.deadSymbols.end(),
       [](Defined *a, Defined *b) { return a->getName() < b->getName(); });
-  return {std::move(liveSymbols), std::move(deadSymbols)};
+  return info;
 }
 
 // Construct a map from symbols to their stringified representations.
@@ -128,16 +137,16 @@ void macho::writeMapFile() {
   os << format("# Arch: %s\n",
                getArchitectureName(config->arch()).str().c_str());
 
+  MapInfo info = gatherMapInfo();
+
   // Dump table of object files.
   os << "# Object files:\n";
   os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
   uint32_t fileIndex = 1;
   DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
-  for (InputFile *file : inputFiles) {
-    if (isa<ObjFile>(file)) {
-      os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
-      readerToFileOrdinal[file] = fileIndex++;
-    }
+  for (InputFile *file : info.files) {
+    os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
+    readerToFileOrdinal[file] = fileIndex++;
   }
 
   // Dump table of sections
@@ -153,13 +162,11 @@ void macho::writeMapFile() {
     }
 
   // Dump table of symbols
-  auto [liveSymbols, deadSymbols] = getSymbols();
-
   DenseMap<Symbol *, std::string> liveSymbolStrings =
-      getSymbolStrings(liveSymbols);
+      getSymbolStrings(info.liveSymbols);
   os << "# Symbols:\n";
   os << "# Address\tSize    \tFile  Name\n";
-  for (Defined *sym : liveSymbols) {
+  for (Defined *sym : info.liveSymbols) {
     assert(sym->isLive());
     os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
                  readerToFileOrdinal[sym->getFile()],
@@ -168,10 +175,10 @@ void macho::writeMapFile() {
 
   if (config->deadStrip) {
     DenseMap<Symbol *, std::string> deadSymbolStrings =
-        getSymbolStrings(deadSymbols);
+        getSymbolStrings(info.deadSymbols);
     os << "# Dead Stripped Symbols:\n";
     os << "#        \tSize    \tFile  Name\n";
-    for (Defined *sym : deadSymbols) {
+    for (Defined *sym : info.deadSymbols) {
       assert(!sym->isLive());
       os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
                    readerToFileOrdinal[sym->getFile()],

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index c2c62cd14c82..03631239ccba 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -108,6 +108,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
     } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
       overridesWeakDef = !isWeakDef && dysym->isWeakDef();
       dysym->unreference();
+    } else if (auto *undef = dyn_cast<Undefined>(s)) {
+      // Preserve the original bitcode file name (instead of using the object
+      // file name).
+      if (undef->wasBitcodeSymbol)
+        file = undef->getFile();
     }
     // Defined symbols take priority over other types of symbols, so in case
     // of a name conflict, we fall through to the replaceSymbol() call below.
@@ -142,7 +147,8 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
   RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
 
   if (wasInserted)
-    replaceSymbol<Undefined>(s, name, file, refState);
+    replaceSymbol<Undefined>(s, name, file, refState,
+                             /*wasBitcodeSymbol=*/false);
   else if (auto *lazy = dyn_cast<LazyArchive>(s))
     lazy->fetchArchiveMember();
   else if (isa<LazyObject>(s))

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index e7a4e4089a74..cb3b271a1912 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -105,6 +105,10 @@ uint64_t Defined::getVA() const {
   return isec->getVA(value);
 }
 
+ObjFile *Defined::getObjectFile() const {
+  return isec ? dyn_cast_or_null<ObjFile>(isec->getFile()) : nullptr;
+}
+
 void Defined::canonicalize() {
   if (unwindEntry)
     unwindEntry = unwindEntry->canonical();

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 61300e2902ff..6113f2d7b0ec 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -133,6 +133,10 @@ class Defined : public Symbol {
 
   uint64_t getVA() const override;
 
+  // Returns the object file that this symbol was defined in. This value 
diff ers
+  // from `getFile()` if the symbol originated from a bitcode file.
+  ObjFile *getObjectFile() const;
+
   std::string getSourceLocation();
 
   // Ensure this symbol's pointers to InputSections point to their canonical
@@ -199,8 +203,10 @@ enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
 
 class Undefined : public Symbol {
 public:
-  Undefined(StringRefZ name, InputFile *file, RefState refState)
-      : Symbol(UndefinedKind, name, file), refState(refState) {
+  Undefined(StringRefZ name, InputFile *file, RefState refState,
+            bool wasBitcodeSymbol)
+      : Symbol(UndefinedKind, name, file), refState(refState),
+        wasBitcodeSymbol(wasBitcodeSymbol) {
     assert(refState != RefState::Unreferenced);
   }
 
@@ -209,6 +215,7 @@ class Undefined : public Symbol {
   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
 
   RefState refState : 2;
+  bool wasBitcodeSymbol;
 };
 
 // On Unix, it is traditionally allowed to write variable definitions without

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index bf5f75c5f80e..33988dffd2ff 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1157,8 +1157,7 @@ void SymtabSection::emitStabs() {
       if (defined->wasIdenticalCodeFolded)
         continue;
 
-      InputSection *isec = defined->isec;
-      ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile());
+      ObjFile *file = defined->getObjectFile();
       if (!file || !file->compileUnit)
         continue;
 

diff  --git a/lld/test/MachO/map-file.ll b/lld/test/MachO/map-file.ll
new file mode 100644
index 000000000000..83824667612e
--- /dev/null
+++ b/lld/test/MachO/map-file.ll
@@ -0,0 +1,90 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+;; Verify that we map symbols to their original bitcode input files, not the
+;; intermediate object files. Also verify that we don't emit these intermediate
+;; object files if no symbol needs to reference them. (Intermediate object files
+;; may still be referenced if they contain compiler-synthesized symbols, such
+;; as outlined functions. TODO: Test this case.)
+
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o
+; RUN: opt -module-summary %t/bar.ll -o %t/bar.thinlto.o
+
+; RUN: %lld -dylib %t/foo.thinlto.o %t/bar.thinlto.o -o %t/foobar.thinlto -map \
+; RUN:   %t/foobar.thinlto.map
+; RUN: FileCheck %s --check-prefix=FOOBAR < %t/foobar.thinlto.map
+
+; RUN: %lld -dylib %t/bar.thinlto.o %t/foo.thinlto.o -o %t/barfoo.thinlto -map \
+; RUN:   %t/barfoo.thinlto.map
+; RUN: FileCheck %s --check-prefix=BARFOO < %t/barfoo.thinlto.map
+
+; RUN: llvm-as %t/foo.ll -o %t/foo.o
+; RUN: llvm-as %t/bar.ll -o %t/bar.o
+; RUN: %lld -dylib %t/foo.o %t/bar.o -o %t/foobar -map %t/foobar.map
+; RUN: FileCheck %s --check-prefix=LTO < %t/foobar.map
+
+; FOOBAR:      # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar.thinlto
+; FOOBAR-NEXT: # Arch: x86_64
+; FOOBAR-NEXT: # Object files:
+; FOOBAR-NEXT: [  0] linker synthesized
+; FOOBAR-NEXT: [  1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
+; FOOBAR-NEXT: [  2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
+; FOOBAR-NEXT: # Sections:
+; FOOBAR:      # Symbols:
+; FOOBAR-NEXT: # Address        Size             File  Name
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  1] _foo
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  2] _bar
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  2] _maybe_weak
+
+; BARFOO:      # Path: {{.*}}{{/|\\}}map-file.ll.tmp/barfoo.thinlto
+; BARFOO-NEXT: # Arch: x86_64
+; BARFOO-NEXT: # Object files:
+; BARFOO-NEXT: [  0] linker synthesized
+; BARFOO-NEXT: [  1] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
+; BARFOO-NEXT: [  2] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
+; BARFOO-NEXT: # Sections:
+; BARFOO:      # Symbols:
+; BARFOO-NEXT: # Address        Size             File  Name
+; BARFOO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  1] _bar
+; BARFOO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  1] _maybe_weak
+; BARFOO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  2] _foo
+
+; LTO:      # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar
+; LTO-NEXT: # Arch: x86_64
+; LTO-NEXT: # Object files:
+; LTO-NEXT: [  0] linker synthesized
+; LTO-NEXT: [  1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.o
+; LTO-NEXT: [  2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.o
+; LTO-NEXT: # Sections:
+; LTO:      # Symbols:
+; LTO-NEXT: # Address        Size             File   Name
+; LTO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  1]  _foo
+; LTO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  2]  _bar
+; LTO-NEXT: 0x{{[0-9A-F]+}}  0x{{[0-9A-F]+}}  [  2]  _maybe_weak
+
+;--- foo.ll
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() {
+  ret void
+}
+
+;; This is weak in foo.ll but non-weak in bar.ll, so the definition in bar.ll
+;; will always prevail. Check that the map file maps `maybe_weak` to bar.ll
+;; regardless of the object file input order.
+define weak_odr void @maybe_weak() {
+  ret void
+}
+
+;--- bar.ll
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @bar() {
+  ret void
+}
+
+define void @maybe_weak() {
+  ret void
+}


        


More information about the llvm-commits mailing list