[lld] b945733 - [lld-macho] Map file should map symbols to their original bitcode file
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 21 19:49:14 PDT 2022
Author: Jez Ng
Date: 2022-10-21T22:49:02-04:00
New Revision: b9457330266e12364e8949939f899135c41d88b3
URL: https://github.com/llvm/llvm-project/commit/b9457330266e12364e8949939f899135c41d88b3
DIFF: https://github.com/llvm/llvm-project/commit/b9457330266e12364e8949939f899135c41d88b3.diff
LOG: [lld-macho] Map file should map symbols to their original bitcode file
... instead of mapping them to the intermediate object file.
This matches ld64.
Reviewed By: #lld-macho, Roger
Differential Revision: https://reviews.llvm.org/D136380
Added:
lld/test/MachO/map-file.ll
Modified:
lld/MachO/LTO.cpp
lld/MachO/MapFile.cpp
lld/MachO/SymbolTable.cpp
lld/MachO/Symbols.cpp
lld/MachO/Symbols.h
lld/MachO/SyntheticSections.cpp
Removed:
################################################################################
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index f465c41aec6c..d698e38611d6 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -108,7 +108,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// load the ObjFile emitted by LTO compilation.
if (r.Prevailing)
replaceSymbol<Undefined>(sym, sym->getName(), sym->getFile(),
- RefState::Strong);
+ RefState::Strong, /*wasBitcodeSymbol=*/true);
// TODO: set the other resolution configs properly
}
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 41f72ebcb331..0a8053cd7871 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -45,32 +45,41 @@ using namespace llvm::sys;
using namespace lld;
using namespace lld::macho;
-using Symbols = std::vector<Defined *>;
-// Returns a pair where the left element is a container of all live Symbols and
-// the right element is a container of all dead symbols.
-static std::pair<Symbols, Symbols> getSymbols() {
- Symbols liveSymbols, deadSymbols;
+struct MapInfo {
+ SmallVector<InputFile *> files;
+ SmallVector<Defined *> liveSymbols;
+ SmallVector<Defined *> deadSymbols;
+};
+
+static MapInfo gatherMapInfo() {
+ MapInfo info;
for (InputFile *file : inputFiles)
- if (isa<ObjFile>(file))
- for (Symbol *sym : file->symbols)
+ if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
+ bool hasEmittedSymbol = false;
+ for (Symbol *sym : file->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym))
if (d->isec && d->getFile() == file) {
if (d->isLive()) {
assert(!shouldOmitFromOutput(d->isec));
- liveSymbols.push_back(d);
+ info.liveSymbols.push_back(d);
} else {
- deadSymbols.push_back(d);
+ info.deadSymbols.push_back(d);
}
+ hasEmittedSymbol = true;
}
- parallelSort(liveSymbols.begin(), liveSymbols.end(),
+ }
+ if (hasEmittedSymbol)
+ info.files.push_back(file);
+ }
+ parallelSort(info.liveSymbols.begin(), info.liveSymbols.end(),
[](Defined *a, Defined *b) {
return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
: a->getName() < b->getName();
});
parallelSort(
- deadSymbols.begin(), deadSymbols.end(),
+ info.deadSymbols.begin(), info.deadSymbols.end(),
[](Defined *a, Defined *b) { return a->getName() < b->getName(); });
- return {std::move(liveSymbols), std::move(deadSymbols)};
+ return info;
}
// Construct a map from symbols to their stringified representations.
@@ -128,16 +137,16 @@ void macho::writeMapFile() {
os << format("# Arch: %s\n",
getArchitectureName(config->arch()).str().c_str());
+ MapInfo info = gatherMapInfo();
+
// Dump table of object files.
os << "# Object files:\n";
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
uint32_t fileIndex = 1;
DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
- for (InputFile *file : inputFiles) {
- if (isa<ObjFile>(file)) {
- os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
- readerToFileOrdinal[file] = fileIndex++;
- }
+ for (InputFile *file : info.files) {
+ os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
+ readerToFileOrdinal[file] = fileIndex++;
}
// Dump table of sections
@@ -153,13 +162,11 @@ void macho::writeMapFile() {
}
// Dump table of symbols
- auto [liveSymbols, deadSymbols] = getSymbols();
-
DenseMap<Symbol *, std::string> liveSymbolStrings =
- getSymbolStrings(liveSymbols);
+ getSymbolStrings(info.liveSymbols);
os << "# Symbols:\n";
os << "# Address\tSize \tFile Name\n";
- for (Defined *sym : liveSymbols) {
+ for (Defined *sym : info.liveSymbols) {
assert(sym->isLive());
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), sym->size,
readerToFileOrdinal[sym->getFile()],
@@ -168,10 +175,10 @@ void macho::writeMapFile() {
if (config->deadStrip) {
DenseMap<Symbol *, std::string> deadSymbolStrings =
- getSymbolStrings(deadSymbols);
+ getSymbolStrings(info.deadSymbols);
os << "# Dead Stripped Symbols:\n";
os << "# \tSize \tFile Name\n";
- for (Defined *sym : deadSymbols) {
+ for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
readerToFileOrdinal[sym->getFile()],
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index c2c62cd14c82..03631239ccba 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -108,6 +108,11 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
} else if (auto *dysym = dyn_cast<DylibSymbol>(s)) {
overridesWeakDef = !isWeakDef && dysym->isWeakDef();
dysym->unreference();
+ } else if (auto *undef = dyn_cast<Undefined>(s)) {
+ // Preserve the original bitcode file name (instead of using the object
+ // file name).
+ if (undef->wasBitcodeSymbol)
+ file = undef->getFile();
}
// Defined symbols take priority over other types of symbols, so in case
// of a name conflict, we fall through to the replaceSymbol() call below.
@@ -142,7 +147,8 @@ Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file,
RefState refState = isWeakRef ? RefState::Weak : RefState::Strong;
if (wasInserted)
- replaceSymbol<Undefined>(s, name, file, refState);
+ replaceSymbol<Undefined>(s, name, file, refState,
+ /*wasBitcodeSymbol=*/false);
else if (auto *lazy = dyn_cast<LazyArchive>(s))
lazy->fetchArchiveMember();
else if (isa<LazyObject>(s))
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index e7a4e4089a74..cb3b271a1912 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -105,6 +105,10 @@ uint64_t Defined::getVA() const {
return isec->getVA(value);
}
+ObjFile *Defined::getObjectFile() const {
+ return isec ? dyn_cast_or_null<ObjFile>(isec->getFile()) : nullptr;
+}
+
void Defined::canonicalize() {
if (unwindEntry)
unwindEntry = unwindEntry->canonical();
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 61300e2902ff..6113f2d7b0ec 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -133,6 +133,10 @@ class Defined : public Symbol {
uint64_t getVA() const override;
+ // Returns the object file that this symbol was defined in. This value
diff ers
+ // from `getFile()` if the symbol originated from a bitcode file.
+ ObjFile *getObjectFile() const;
+
std::string getSourceLocation();
// Ensure this symbol's pointers to InputSections point to their canonical
@@ -199,8 +203,10 @@ enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };
class Undefined : public Symbol {
public:
- Undefined(StringRefZ name, InputFile *file, RefState refState)
- : Symbol(UndefinedKind, name, file), refState(refState) {
+ Undefined(StringRefZ name, InputFile *file, RefState refState,
+ bool wasBitcodeSymbol)
+ : Symbol(UndefinedKind, name, file), refState(refState),
+ wasBitcodeSymbol(wasBitcodeSymbol) {
assert(refState != RefState::Unreferenced);
}
@@ -209,6 +215,7 @@ class Undefined : public Symbol {
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
RefState refState : 2;
+ bool wasBitcodeSymbol;
};
// On Unix, it is traditionally allowed to write variable definitions without
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index bf5f75c5f80e..33988dffd2ff 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1157,8 +1157,7 @@ void SymtabSection::emitStabs() {
if (defined->wasIdenticalCodeFolded)
continue;
- InputSection *isec = defined->isec;
- ObjFile *file = dyn_cast_or_null<ObjFile>(isec->getFile());
+ ObjFile *file = defined->getObjectFile();
if (!file || !file->compileUnit)
continue;
diff --git a/lld/test/MachO/map-file.ll b/lld/test/MachO/map-file.ll
new file mode 100644
index 000000000000..83824667612e
--- /dev/null
+++ b/lld/test/MachO/map-file.ll
@@ -0,0 +1,90 @@
+; REQUIRES: x86
+; RUN: rm -rf %t; split-file %s %t
+
+;; Verify that we map symbols to their original bitcode input files, not the
+;; intermediate object files. Also verify that we don't emit these intermediate
+;; object files if no symbol needs to reference them. (Intermediate object files
+;; may still be referenced if they contain compiler-synthesized symbols, such
+;; as outlined functions. TODO: Test this case.)
+
+; RUN: opt -module-summary %t/foo.ll -o %t/foo.thinlto.o
+; RUN: opt -module-summary %t/bar.ll -o %t/bar.thinlto.o
+
+; RUN: %lld -dylib %t/foo.thinlto.o %t/bar.thinlto.o -o %t/foobar.thinlto -map \
+; RUN: %t/foobar.thinlto.map
+; RUN: FileCheck %s --check-prefix=FOOBAR < %t/foobar.thinlto.map
+
+; RUN: %lld -dylib %t/bar.thinlto.o %t/foo.thinlto.o -o %t/barfoo.thinlto -map \
+; RUN: %t/barfoo.thinlto.map
+; RUN: FileCheck %s --check-prefix=BARFOO < %t/barfoo.thinlto.map
+
+; RUN: llvm-as %t/foo.ll -o %t/foo.o
+; RUN: llvm-as %t/bar.ll -o %t/bar.o
+; RUN: %lld -dylib %t/foo.o %t/bar.o -o %t/foobar -map %t/foobar.map
+; RUN: FileCheck %s --check-prefix=LTO < %t/foobar.map
+
+; FOOBAR: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar.thinlto
+; FOOBAR-NEXT: # Arch: x86_64
+; FOOBAR-NEXT: # Object files:
+; FOOBAR-NEXT: [ 0] linker synthesized
+; FOOBAR-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
+; FOOBAR-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
+; FOOBAR-NEXT: # Sections:
+; FOOBAR: # Symbols:
+; FOOBAR-NEXT: # Address Size File Name
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar
+; FOOBAR-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak
+
+; BARFOO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/barfoo.thinlto
+; BARFOO-NEXT: # Arch: x86_64
+; BARFOO-NEXT: # Object files:
+; BARFOO-NEXT: [ 0] linker synthesized
+; BARFOO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/bar.thinlto.o
+; BARFOO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/foo.thinlto.o
+; BARFOO-NEXT: # Sections:
+; BARFOO: # Symbols:
+; BARFOO-NEXT: # Address Size File Name
+; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _bar
+; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _maybe_weak
+; BARFOO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _foo
+
+; LTO: # Path: {{.*}}{{/|\\}}map-file.ll.tmp/foobar
+; LTO-NEXT: # Arch: x86_64
+; LTO-NEXT: # Object files:
+; LTO-NEXT: [ 0] linker synthesized
+; LTO-NEXT: [ 1] {{.*}}{{/|\\}}map-file.ll.tmp/foo.o
+; LTO-NEXT: [ 2] {{.*}}{{/|\\}}map-file.ll.tmp/bar.o
+; LTO-NEXT: # Sections:
+; LTO: # Symbols:
+; LTO-NEXT: # Address Size File Name
+; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 1] _foo
+; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _bar
+; LTO-NEXT: 0x{{[0-9A-F]+}} 0x{{[0-9A-F]+}} [ 2] _maybe_weak
+
+;--- foo.ll
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() {
+ ret void
+}
+
+;; This is weak in foo.ll but non-weak in bar.ll, so the definition in bar.ll
+;; will always prevail. Check that the map file maps `maybe_weak` to bar.ll
+;; regardless of the object file input order.
+define weak_odr void @maybe_weak() {
+ ret void
+}
+
+;--- bar.ll
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @bar() {
+ ret void
+}
+
+define void @maybe_weak() {
+ ret void
+}
More information about the llvm-commits
mailing list