[lld] 2b920ae - [lld] Add archive file support to Mach-O backend

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu May 14 12:58:46 PDT 2020


Author: Kellie Medlin
Date: 2020-05-14T12:58:35-07:00
New Revision: 2b920ae78c1d3fd36aeb7e77ca8de18a36b92344

URL: https://github.com/llvm/llvm-project/commit/2b920ae78c1d3fd36aeb7e77ca8de18a36b92344
DIFF: https://github.com/llvm/llvm-project/commit/2b920ae78c1d3fd36aeb7e77ca8de18a36b92344.diff

LOG: [lld] Add archive file support to Mach-O backend

With this change, basic archive files can be linked together. Input
section discovery has been refactored into a function since archive
files lazily resolve their symbols / the object files containing those
symbols.

Reviewed By: int3, smeenai

Differential Revision: https://reviews.llvm.org/D78342

Added: 
    lld/test/MachO/archive.s
    lld/test/MachO/invalid/archive-no-index.s
    lld/test/MachO/invalid/bad-archive.s
    lld/test/MachO/symbol-order.s

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/SymbolTable.cpp
    lld/MachO/SymbolTable.h
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 311e155a21a0..1867af4f5050 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -105,6 +106,16 @@ static void addFile(StringRef path) {
   MemoryBufferRef mbref = *buffer;
 
   switch (identify_magic(mbref.getBuffer())) {
+  case file_magic::archive: {
+    std::unique_ptr<object::Archive> file = CHECK(
+        object::Archive::create(mbref), path + ": failed to parse archive");
+
+    if (!file->isEmpty() && !file->hasSymbolTable())
+      error(path + ": archive has no index; run ranlib to add one");
+
+    inputFiles.push_back(make<ArchiveFile>(std::move(file)));
+    break;
+  }
   case file_magic::macho_object:
     inputFiles.push_back(make<ObjFile>(mbref));
     break;

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7bb070843696..c9a99c16e0b0 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -302,6 +302,30 @@ DylibFile *DylibFile::createLibSystemMock() {
   return file;
 }
 
+ArchiveFile::ArchiveFile(std::unique_ptr<llvm::object::Archive> &&f)
+    : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) {
+  for (const object::Archive::Symbol &sym : file->symbols())
+    symtab->addLazy(sym.getName(), this, sym);
+}
+
+void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
+  object::Archive::Child c =
+      CHECK(sym.getMember(), toString(this) +
+                                 ": could not get the member for symbol " +
+                                 sym.getName());
+
+  if (!seen.insert(c.getChildOffset()).second)
+    return;
+
+  MemoryBufferRef mb =
+      CHECK(c.getMemoryBufferRef(),
+            toString(this) +
+                ": could not get the buffer for the member defining symbol " +
+                sym.getName());
+  auto file = make<ObjFile>(mb);
+  sections.insert(sections.end(), file->sections.begin(), file->sections.end());
+}
+
 // Returns "<internal>" or "baz.o".
 std::string lld::toString(const InputFile *file) {
   return file ? std::string(file->getName()) : "<internal>";

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 6d760d4c3b50..c94035b1bcf6 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -28,6 +28,7 @@ class InputFile {
   enum Kind {
     ObjKind,
     DylibKind,
+    ArchiveKind,
   };
 
   virtual ~InputFile() = default;
@@ -81,6 +82,20 @@ class DylibFile : public InputFile {
   std::vector<DylibFile *> reexported;
 };
 
+// .a file
+class ArchiveFile : public InputFile {
+public:
+  explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file);
+  static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
+  void fetch(const llvm::object::Archive::Symbol &sym);
+
+private:
+  std::unique_ptr<llvm::object::Archive> file;
+  // Keep track of children fetched from the archive by tracking
+  // which address offsets have been fetched already.
+  llvm::DenseSet<uint64_t> seen;
+};
+
 extern std::vector<InputFile *> inputFiles;
 
 llvm::Optional<MemoryBufferRef> readFile(StringRef path);

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 6e1d9771c965..80e870d79890 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -56,6 +56,8 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
 
   if (wasInserted)
     replaceSymbol<Undefined>(s, name);
+  else if (LazySymbol *lazy = dyn_cast<LazySymbol>(s))
+    lazy->fetchArchiveMember();
   return s;
 }
 
@@ -69,4 +71,17 @@ Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file) {
   return s;
 }
 
+Symbol *SymbolTable::addLazy(StringRef name, ArchiveFile *file,
+                             const llvm::object::Archive::Symbol &sym) {
+  Symbol *s;
+  bool wasInserted;
+  std::tie(s, wasInserted) = insert(name);
+
+  if (wasInserted)
+    replaceSymbol<LazySymbol>(s, file, sym);
+  else if (isa<Undefined>(s))
+    file->fetch(sym);
+  return s;
+}
+
 SymbolTable *macho::symtab;

diff  --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h
index b4b10c684a81..2379008db56d 100644
--- a/lld/MachO/SymbolTable.h
+++ b/lld/MachO/SymbolTable.h
@@ -30,6 +30,9 @@ class SymbolTable {
 
   Symbol *addDylib(StringRef name, DylibFile *file);
 
+  Symbol *addLazy(StringRef name, ArchiveFile *file,
+                  const llvm::object::Archive::Symbol &sym);
+
   ArrayRef<Symbol *> getSymbols() const { return symVector; }
   Symbol *find(StringRef name);
 

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index ec383ec9781c..9813e4fd9ade 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -15,6 +15,8 @@ using namespace llvm;
 using namespace lld;
 using namespace lld::macho;
 
+void LazySymbol::fetchArchiveMember() { file->fetch(sym); }
+
 // Returns a symbol for an error message.
 std::string lld::toString(const Symbol &sym) {
   if (Optional<std::string> s = demangleItanium(sym.getName()))

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 7b35b4530a95..5b9476984a4b 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -35,6 +35,7 @@ class Symbol {
     DefinedKind,
     UndefinedKind,
     DylibKind,
+    LazyKind,
   };
 
   Kind kind() const { return static_cast<Kind>(symbolKind); }
@@ -81,6 +82,20 @@ class DylibSymbol : public Symbol {
   uint32_t lazyBindOffset = UINT32_MAX;
 };
 
+class LazySymbol : public Symbol {
+public:
+  LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)
+      : Symbol(LazyKind, sym.getName()), file(file), sym(sym) {}
+
+  static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
+
+  void fetchArchiveMember();
+
+private:
+  ArchiveFile *file;
+  const llvm::object::Archive::Symbol sym;
+};
+
 inline uint64_t Symbol::getVA() const {
   if (auto *d = dyn_cast<Defined>(this))
     return d->isec->getVA() + d->value;
@@ -91,6 +106,7 @@ union SymbolUnion {
   alignas(Defined) char a[sizeof(Defined)];
   alignas(Undefined) char b[sizeof(Undefined)];
   alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
+  alignas(LazySymbol) char d[sizeof(LazySymbol)];
 };
 
 template <typename T, typename... ArgT>

diff  --git a/lld/test/MachO/archive.s b/lld/test/MachO/archive.s
new file mode 100644
index 000000000000..370980768faa
--- /dev/null
+++ b/lld/test/MachO/archive.s
@@ -0,0 +1,35 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global _boo; _boo: ret"                           | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
+# RUN: echo ".global _bar; _bar: ret"                           | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
+# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+
+# RUN: rm -f %t/test.a
+# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o
+# RUN: lld -flavor darwinnew %t/main.o %t/test.a -o %t/test.out
+
+## TODO: Run llvm-nm -p to validate symbol order
+# RUN: llvm-nm %t/test.out | FileCheck %s
+# CHECK: T _bar
+# CHECK: T _boo
+# CHECK: T _main
+
+## Linking with the archive first in the command line shouldn't change anything
+# RUN: lld -flavor darwinnew %t/test.a %t/main.o -o %t/test.out
+# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
+# ARCHIVE-FIRST: T _bar
+# ARCHIVE-FIRST: T _boo
+# ARCHIVE-FIRST: T _main
+
+
+# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE
+# VISIBLE-NOT: T _undefined
+# VISIBLE-NOT: T _unused
+
+.global _main
+_main:
+  callq _boo
+  callq _bar
+  mov $0, %rax
+  ret

diff  --git a/lld/test/MachO/invalid/archive-no-index.s b/lld/test/MachO/invalid/archive-no-index.s
new file mode 100644
index 000000000000..0f2f023e83c1
--- /dev/null
+++ b/lld/test/MachO/invalid/archive-no-index.s
@@ -0,0 +1,17 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global _boo; _boo: ret"                           | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/2.o
+# RUN: echo ".global _bar; _bar: ret"                           | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/3.o
+# RUN: echo ".global _undefined; .global _unused; _unused: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+
+# RUN: rm -f %t/test.a
+# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
+
+# RUN: not lld -flavor darwinnew %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
+# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
+
+.global _main
+_main:
+  mov $0, %rax
+  ret

diff  --git a/lld/test/MachO/invalid/bad-archive.s b/lld/test/MachO/invalid/bad-archive.s
new file mode 100644
index 000000000000..9429dc3ec311
--- /dev/null
+++ b/lld/test/MachO/invalid/bad-archive.s
@@ -0,0 +1,11 @@
+# REQUIRES: x86
+# RUN: echo "!<arch>" > %t.a
+# RUN: echo "foo" >> %t.a
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+
+# RUN: not lld -flavor darwinnew %t.o %t.a -o /dev/null 2>&1 | FileCheck -DFILE=%t.a %s
+# CHECK: error: [[FILE]]: failed to parse archive: truncated or malformed archive (remaining size of archive too small for next archive member header at offset 8)
+
+.global _main
+_main:
+  ret

diff  --git a/lld/test/MachO/symbol-order.s b/lld/test/MachO/symbol-order.s
new file mode 100644
index 000000000000..e65663262285
--- /dev/null
+++ b/lld/test/MachO/symbol-order.s
@@ -0,0 +1,46 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: echo ".global f, g; .section __TEXT,test_g; g: callq f"          | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/g.o
+# RUN: echo ".global f; .section __TEXT,test_f1; f: ret"                | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f1.o
+# RUN: echo ".global f; .section __TEXT,test_f2; f: ret"                | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/f2.o
+# RUN: echo ".global f, g; .section __TEXT,test_fg; f: ret; g: callq f" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/fg.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: lld -flavor darwinnew -dylib -o %t/libf1.dylib %t/f1.o
+
+# RUN: rm -f %t/libf2_g.a
+# RUN: llvm-ar rcs %t/libf2_g.a %t/f2.o %t/g.o
+
+# RUN: rm -f %t/libfg.a
+# RUN: llvm-ar rcs %t/libfg.a %t/fg.o
+
+# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libf2_g.a %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix DYLIB-FIRST
+# DYLIB-FIRST:      SYMBOL TABLE:
+# DYLIB-FIRST-DAG:  __TEXT,test_g g
+# DYLIB-FIRST:      Lazy bind table:
+# DYLIB-FIRST-NEXT: segment  section            address       dylib            symbol
+# DYLIB-FIRST-NEXT: __DATA   __la_symbol_ptr    {{[0-9a-z]+}} libf1            f
+
+# RUN: lld -flavor darwinnew %t/libf2_g.a %t/libf1.dylib %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST
+# ARCHIVE-FIRST:      SYMBOL TABLE:
+# ARCHIVE-FIRST-DAG:  __TEXT,test_f2 f
+# ARCHIVE-FIRST-DAG:  __TEXT,test_g g
+# ARCHIVE-FIRST:      Lazy bind table:
+# ARCHIVE-FIRST-NEXT: segment  section            address       dylib            symbol
+# ARCHIVE-FIRST-EMPTY:
+
+# RUN: lld -flavor darwinnew %t/libf1.dylib %t/libfg.a %t/test.o -o %t/test.out
+# RUN: llvm-objdump --syms --macho --lazy-bind %t/test.out | FileCheck %s --check-prefix ARCHIVE-PRIORITY
+# ARCHIVE-PRIORITY:      SYMBOL TABLE:
+# ARCHIVE-PRIORITY-DAG:  __TEXT,test_fg f
+# ARCHIVE-PRIORITY-DAG:  __TEXT,test_fg g
+# ARCHIVE-PRIORITY:      Lazy bind table:
+# ARCHIVE-PRIORITY-NEXT: segment  section            address       dylib            symbol
+# ARCHIVE-PRIORITY-EMPTY:
+
+.global g
+.global _main
+_main:
+  callq g
+  ret


        


More information about the llvm-commits mailing list