[lld] 2b920ae - [lld] Add archive file support to Mach-O backend
Jez Ng via llvm-commits
llvm-commits at lists.llvm.org
Thu May 14 12:58:46 PDT 2020
Author: Kellie Medlin
Date: 2020-05-14T12:58:35-07:00
New Revision: 2b920ae78c1d3fd36aeb7e77ca8de18a36b92344
URL: https://github.com/llvm/llvm-project/commit/2b920ae78c1d3fd36aeb7e77ca8de18a36b92344
DIFF: https://github.com/llvm/llvm-project/commit/2b920ae78c1d3fd36aeb7e77ca8de18a36b92344.diff
LOG: [lld] Add archive file support to Mach-O backend
With this change, basic archive files can be linked together. Input
section discovery has been refactored into a function since archive
files lazily resolve their symbols / the object files containing those
symbols.
Reviewed By: int3, smeenai
Differential Revision: https://reviews.llvm.org/D78342
Added:
lld/test/MachO/archive.s
lld/test/MachO/invalid/archive-no-index.s
lld/test/MachO/invalid/bad-archive.s
lld/test/MachO/symbol-order.s
Modified:
lld/MachO/Driver.cpp
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/SymbolTable.cpp
lld/MachO/SymbolTable.h
lld/MachO/Symbols.cpp
lld/MachO/Symbols.h
Removed:
################################################################################
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 311e155a21a0..1867af4f5050 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -105,6 +106,16 @@ static void addFile(StringRef path) {
MemoryBufferRef mbref = *buffer;
switch (identify_magic(mbref.getBuffer())) {
+ case file_magic::archive: {
+ std::unique_ptr<object::Archive> file = CHECK(
+ object::Archive::create(mbref), path + ": failed to parse archive");
+
+ if (!file->isEmpty() && !file->hasSymbolTable())
+ error(path + ": archive has no index; run ranlib to add one");
+
+ inputFiles.push_back(make<ArchiveFile>(std::move(file)));
+ break;
+ }
case file_magic::macho_object:
inputFiles.push_back(make<ObjFile>(mbref));
break;
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7bb070843696..c9a99c16e0b0 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -302,6 +302,30 @@ DylibFile *DylibFile::createLibSystemMock() {
return file;
}
+ArchiveFile::ArchiveFile(std::unique_ptr<llvm::object::Archive> &&f)
+ : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
+ for (const object::Archive::Symbol &sym : file->symbols())
+ symtab->addLazy(sym.getName(), this, sym);
+}
+
+void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
+ object::Archive::Child c =
+ CHECK(sym.getMember(), toString(this) +
+ ": could not get the member for symbol " +
+ sym.getName());
+
+ if (!seen.insert(c.getChildOffset()).second)
+ return;
+
+ MemoryBufferRef mb =
+ CHECK(c.getMemoryBufferRef(),
+ toString(this) +
+ ": could not get the buffer for the member defining symbol " +
+ sym.getName());
+ auto file = make<ObjFile>(mb);
+ sections.insert(sections.end(), file->sections.begin(), file->sections.end());
+}
+
// Returns "<internal>" or "baz.o".
std::string lld::toString(const InputFile *file) {
return file ? std::string(file->getName()) : "<internal>";
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 6d760d4c3b50..c94035b1bcf6 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -28,6 +28,7 @@ class InputFile {
enum Kind {
ObjKind,
DylibKind,
+ ArchiveKind,
};
virtual ~InputFile() = default;
@@ -81,6 +82,20 @@ class DylibFile : public InputFile {
std::vector<DylibFile *> reexported;
};
+// .a file
+class ArchiveFile : public InputFile {
+public:
+ explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
+ static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
+ void fetch(const llvm::object::Archive::Symbol &sym);
+
+private:
+ std::unique_ptr<llvm::object::Archive> file;
+ // Keep track of children fetched from the archive by tracking
+ // which address offsets have been fetched already.
+ llvm::DenseSet<uint64_t> seen;
+};
+
extern std::vector<InputFile *> inputFiles;
llvm::Optional<MemoryBufferRef> readFile(StringRef path);
diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 6e1d9771c965..80e870d79890 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -56,6 +56,8 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
if (wasInserted)
replaceSymbol<Undefined>(s, name);
+ else if (LazySymbol *lazy = dyn_cast<LazySymbol>(s))
+ lazy->fetchArchiveMember();
return s;
}
@@ -69,4 +71,17 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file) {
return s;
}
+Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
+ const llvm::object::Archive::Symbol &sym) {
+ Symbol *s;
+ bool wasInserted;
+ std::tie(s, wasInserted) = insert(name);
+
+ if (wasInserted)
+ replaceSymbol<LazySymbol>(s, file, sym);
+ else if (isa<Undefined>(s))
+ file->fetch(sym);
+ return s;
+}
+
SymbolTable *macho::symtab;
diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index b4b10c684a81..2379008db56d 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -30,6 +30,9 @@ class SymbolTable {
Symbol *addDylib(StringRef name, DylibFile *file);
+ Symbol *addLazy(StringRef name, ArchiveFile *file,
+ const llvm::object::Archive::Symbol &sym);
+
ArrayRef<Symbol *> getSymbols() const { return symVector; }
Symbol *find(StringRef name);
diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index ec383ec9781c..9813e4fd9ade 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -15,6 +15,8 @@ using namespace llvm;
using namespace lld;
using namespace lld::macho;
+void LazySymbol::fetchArchiveMember() { file->fetch(sym); }
+
// Returns a symbol for an error message.
std::string lld::toString(const Symbol &sym) {
if (Optional<std::string> s = demangleItanium(sym.getName()))
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 7b35b4530a95..5b9476984a4b 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -35,6 +35,7 @@ class Symbol {
DefinedKind,
UndefinedKind,
DylibKind,
+ LazyKind,
};
Kind kind() const { return static_cast<Kind>(symbolKind); }
@@ -81,6 +82,20 @@ class DylibSymbol : public Symbol {
uint32_t lazyBindOffset = UINT32_MAX;
};
+class LazySymbol : public Symbol {
+public:
+ LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
+ : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
+
+ static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
+
+ void fetchArchiveMember();
+
+private:
+ ArchiveFile *file;
+ const llvm::object::Archive::Symbol sym;
+};
+
inline uint64_t Symbol::getVA() const {
if (auto *d = dyn_cast<Defined>(this))
return d->isec->getVA() + d->value;
@@ -91,6 +106,7 @@ union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
+ alignas(LazySymbol) char d[sizeof(LazySymbol)];
};
template <typename T, typename... ArgT>
diff --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s
new file mode 100644
index 000000000000..370980768faa
--- /dev/null
+++ b/lld/test/MachO/archive.s
@@ -0,0 +1,35 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
+# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
+# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+
+# RUN: rm -f %t/test.a
+# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o
+# RUN: lld -flavor darwinnew %t/main.o %t/test.a -o %t/test.out
+
+## TODO: Run llvm-nm -p to validate symbol order
+# RUN: llvm-nm %t/test.out | FileCheck %s
+# CHECK: T _bar
+# CHECK: T _boo
+# CHECK: T _main
+
+## Linking with the archive first in the command line shouldn't change anything
+# RUN: lld -flavor darwinnew %t/test.a %t/main.o -o %t/test.out
+# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
+# ARCHIVE-FIRST: T _bar
+# ARCHIVE-FIRST: T _boo
+# ARCHIVE-FIRST: T _main
+
+
+# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE
+# VISIBLE-NOT: T _undefined
+# VISIBLE-NOT: T _unused
+
+.global _main
+_main:
+ callq _boo
+ callq _bar
+ mov $0, %rax
+ ret
diff --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
new file mode 100644
index 000000000000..0f2f023e83c1
--- /dev/null
+++ b/lld/test/MachO/invalid/archive-no-index.s
@@ -0,0 +1,17 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global _boo; _boo: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
+# RUN: echo ".global _bar; _bar: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
+# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+
+# RUN: rm -f %t/test.a
+# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
+
+# RUN: not lld -flavor darwinnew %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
+# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
+
+.global _main
+_main:
+ mov $0, %rax
+ ret
diff --git a/lld/test/MachO/invalid/bad-archive.s b/lld/test/MachO/invalid/bad-archive.s
new file mode 100644
index 000000000000..9429dc3ec311
--- /dev/null
+++ b/lld/test/MachO/invalid/bad-archive.s
@@ -0,0 +1,11 @@
+# REQUIRES: x86
+# RUN: echo "!<arch>" > %t.a
+# RUN: echo "foo" >> %t.a
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+
+# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck -DFILE=%t.a %s
+# CHECK: error: [[FILE]]: failed to parse archive: truncated or malformed archive (remaining size of archive too small for next archive member header at offset 8)
+
+.global _main
+_main:
+ ret
diff --git a/lld/test/MachO/symbol-order.s b/lld/test/MachO/symbol-order.s
new file mode 100644
index 000000000000..e65663262285
--- /dev/null
+++ b/lld/test/MachO/symbol-order.s
@@ -0,0 +1,46 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global f, g; .section __TEXT,test_g; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o
+# RUN: echo ".global f; .section __TEXT,test_f1; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o
+# RUN: echo ".global f; .section __TEXT,test_f2; f: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o
+# RUN: echo ".global f, g; .section __TEXT,test_fg; f: ret; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: lld -flavor darwinnew -dylib -o %t/libf1.dylib %t/f1.o
+
+# RUN: rm -f %t/libf2_g.a
+# RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o
+
+# RUN: rm -f %t/libfg.a
+# RUN: llvm-ar rcs %t/libfg.a %t/fg.o
+
+# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix DYLIB-FIRST
+# DYLIB-FIRST: SYMBOL TABLE:
+# DYLIB-FIRST-DAG: __TEXT,test_g g
+# DYLIB-FIRST: Lazy bind table:
+# DYLIB-FIRST-NEXT: segment section address dylib symbol
+# DYLIB-FIRST-NEXT: __DATA __la_symbol_ptr {{[0-9a-z]+}} libf1 f
+
+# RUN: lld -flavor darwinnew %t/libf2_g.a %t/libf1.dylib %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
+# ARCHIVE-FIRST: SYMBOL TABLE:
+# ARCHIVE-FIRST-DAG: __TEXT,test_f2 f
+# ARCHIVE-FIRST-DAG: __TEXT,test_g g
+# ARCHIVE-FIRST: Lazy bind table:
+# ARCHIVE-FIRST-NEXT: segment section address dylib symbol
+# ARCHIVE-FIRST-EMPTY:
+
+# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libfg.a %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-PRIORITY
+# ARCHIVE-PRIORITY: SYMBOL TABLE:
+# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg f
+# ARCHIVE-PRIORITY-DAG: __TEXT,test_fg g
+# ARCHIVE-PRIORITY: Lazy bind table:
+# ARCHIVE-PRIORITY-NEXT: segment section address dylib symbol
+# ARCHIVE-PRIORITY-EMPTY:
+
+.global g
+.global _main
+_main:
+ callq g
+ ret
More information about the llvm-commits
mailing list