[lld] a12e7d4 - [lld-macho] Handle GOT relocations of non-dylib symbols

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 17 20:41:56 PDT 2020


Author: Jez Ng
Date: 2020-06-17T20:41:28-07:00
New Revision: a12e7d406de2c31633414e5c73881869c930d194

URL: https://github.com/llvm/llvm-project/commit/a12e7d406de2c31633414e5c73881869c930d194
DIFF: https://github.com/llvm/llvm-project/commit/a12e7d406de2c31633414e5c73881869c930d194.diff

LOG: [lld-macho] Handle GOT relocations of non-dylib symbols

Summary:
Turns out this case is actually really common -- it happens whenever there's
a reference to an `extern` variable that ends up statically linked.

Depends on D80856.

Reviewers: ruiu, pcc, MaskRay, smeenai, alexshap, gkm, Ktwu, christylee

Reviewed By: smeenai

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80857

Added: 
    lld/test/MachO/local-got.s

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Target.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 6353cc241014..3ce65ad4e22f 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -34,8 +34,8 @@ struct X86_64 : TargetInfo {
   void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
                             uint64_t entryAddr) const override;
 
-  void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) override;
-  uint64_t getDylibSymbolVA(const DylibSymbol &, uint8_t type) const override;
+  void prepareSymbolRelocation(lld::macho::Symbol &, uint8_t type) override;
+  uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
 };
 
 } // namespace
@@ -208,30 +208,54 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                    in.stubHelper->addr);
 }
 
-void X86_64::prepareDylibSymbolRelocation(DylibSymbol &sym, uint8_t type) {
+void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym, uint8_t type) {
   switch (type) {
   case X86_64_RELOC_GOT_LOAD:
     // TODO: implement mov -> lea relaxation for non-dynamic symbols
   case X86_64_RELOC_GOT:
     in.got->addEntry(sym);
     break;
-  case X86_64_RELOC_BRANCH:
-    in.stubs->addEntry(sym);
+  case X86_64_RELOC_BRANCH: {
+    if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+      in.stubs->addEntry(*dysym);
+    break;
+  }
+  case X86_64_RELOC_UNSIGNED:
+  case X86_64_RELOC_SIGNED:
+  case X86_64_RELOC_SIGNED_1:
+  case X86_64_RELOC_SIGNED_2:
+  case X86_64_RELOC_SIGNED_4:
+    break;
+  case X86_64_RELOC_SUBTRACTOR:
+  case X86_64_RELOC_TLV:
+    fatal("TODO: handle relocation type " + std::to_string(type));
     break;
   default:
-    llvm_unreachable("Unexpected dylib relocation type");
+    llvm_unreachable("unexpected relocation type");
   }
 }
 
-uint64_t X86_64::getDylibSymbolVA(const DylibSymbol &sym, uint8_t type) const {
+uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym,
+                             uint8_t type) const {
   switch (type) {
   case X86_64_RELOC_GOT_LOAD:
   case X86_64_RELOC_GOT:
     return in.got->addr + sym.gotIndex * WordSize;
   case X86_64_RELOC_BRANCH:
-    return in.stubs->addr + sym.stubsIndex * sizeof(stub);
+    if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+      return in.stubs->addr + dysym->stubsIndex * sizeof(stub);
+    return sym.getVA();
+  case X86_64_RELOC_UNSIGNED:
+  case X86_64_RELOC_SIGNED:
+  case X86_64_RELOC_SIGNED_1:
+  case X86_64_RELOC_SIGNED_2:
+  case X86_64_RELOC_SIGNED_4:
+    return sym.getVA();
+  case X86_64_RELOC_SUBTRACTOR:
+  case X86_64_RELOC_TLV:
+    fatal("TODO: handle relocation type " + std::to_string(type));
   default:
-    llvm_unreachable("Unexpected dylib relocation type");
+    llvm_unreachable("Unexpected relocation type");
   }
 }
 

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 3dce2aeb0139..327fd419b613 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -33,15 +33,10 @@ void InputSection::writeTo(uint8_t *buf) {
 
   for (Reloc &r : relocs) {
     uint64_t va = 0;
-    if (auto *s = r.target.dyn_cast<Symbol *>()) {
-      if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s)) {
-        va = target->getDylibSymbolVA(*dylibSymbol, r.type);
-      } else {
-        va = s->getVA();
-      }
-    } else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
+    if (auto *s = r.target.dyn_cast<Symbol *>())
+      va = target->getSymbolVA(*s, r.type);
+    else if (auto *isec = r.target.dyn_cast<InputSection *>())
       va = isec->getVA();
-    }
 
     uint64_t val = va + r.addend;
     if (r.pcrel)

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 1fe7697289ba..63748ee48324 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -47,6 +47,8 @@ class Symbol {
 
   uint64_t getFileOffset() const;
 
+  uint32_t gotIndex = UINT32_MAX;
+
 protected:
   Symbol(Kind k, StringRefZ name) : symbolKind(k), name(name) {}
 
@@ -80,7 +82,6 @@ class DylibSymbol : public Symbol {
   static bool classof(const Symbol *s) { return s->kind() == DylibKind; }
 
   DylibFile *file;
-  uint32_t gotIndex = UINT32_MAX;
   uint32_t stubsIndex = UINT32_MAX;
   uint32_t lazyBindOffset = UINT32_MAX;
 };

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 16fdbb5cc008..7237aa65331e 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -80,12 +80,18 @@ GotSection::GotSection()
   // table, which we do not currently emit
 }
 
-void GotSection::addEntry(DylibSymbol &sym) {
+void GotSection::addEntry(Symbol &sym) {
   if (entries.insert(&sym)) {
     sym.gotIndex = entries.size() - 1;
   }
 }
 
+void GotSection::writeTo(uint8_t *buf) const {
+  for (size_t i = 0, n = entries.size(); i < n; ++i)
+    if (auto *defined = dyn_cast<Defined>(entries[i]))
+      write64le(&buf[i * WordSize], defined->getVA());
+}
+
 BindingSection::BindingSection()
     : SyntheticSection(segment_names::linkEdit, section_names::binding) {}
 
@@ -112,20 +118,32 @@ void BindingSection::finalizeContents() {
   os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
                              in.got->parent->index);
   encodeULEB128(in.got->getSegmentOffset(), os);
-  for (const DylibSymbol *sym : in.got->getEntries()) {
-    // TODO: Implement compact encoding -- we only need to encode the
-    // 
diff erences between consecutive symbol entries.
-    if (sym->file->ordinal <= BIND_IMMEDIATE_MASK) {
-      os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
-                                 sym->file->ordinal);
+  uint32_t entries_to_skip = 0;
+  for (const Symbol *sym : in.got->getEntries()) {
+    if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
+      if (entries_to_skip != 0) {
+        os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
+        encodeULEB128(WordSize * entries_to_skip, os);
+        entries_to_skip = 0;
+      }
+
+      // TODO: Implement compact encoding -- we only need to encode the
+      // 
diff erences between consecutive symbol entries.
+      if (dysym->file->ordinal <= BIND_IMMEDIATE_MASK) {
+        os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
+                                   dysym->file->ordinal);
+      } else {
+        error("TODO: Support larger dylib symbol ordinals");
+        continue;
+      }
+      os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
+         << dysym->getName() << '\0'
+         << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
+         << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
     } else {
-      error("TODO: Support larger dylib symbol ordinals");
-      continue;
+      // We have a defined symbol with a pre-populated address; skip over it.
+      ++entries_to_skip;
     }
-    os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
-       << sym->getName() << '\0'
-       << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER)
-       << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
   }
 
   os << static_cast<uint8_t>(BIND_OPCODE_DONE);

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index c95f1556e918..95452538c445 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -78,23 +78,18 @@ class GotSection : public SyntheticSection {
 public:
   GotSection();
 
-  const llvm::SetVector<const DylibSymbol *> &getEntries() const {
-    return entries;
-  }
+  const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
 
   bool isNeeded() const override { return !entries.empty(); }
 
   uint64_t getSize() const override { return entries.size() * WordSize; }
 
-  void writeTo(uint8_t *buf) const override {
-    // Nothing to write, GOT contains all zeros at link time; it's populated at
-    // runtime by dyld.
-  }
+  void writeTo(uint8_t *buf) const override;
 
-  void addEntry(DylibSymbol &sym);
+  void addEntry(Symbol &sym);
 
 private:
-  llvm::SetVector<const DylibSymbol *> entries;
+  llvm::SetVector<const Symbol *> entries;
 };
 
 // Stores bind opcodes for telling dyld which symbols to load non-lazily.

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 74df6a951a90..7687fcdc66ea 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -18,6 +18,7 @@
 namespace lld {
 namespace macho {
 
+class Symbol;
 class DylibSymbol;
 class InputSection;
 struct Reloc;
@@ -48,13 +49,12 @@ class TargetInfo {
   virtual void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
                                     uint64_t entryAddr) const = 0;
 
-  // Dylib symbols are referenced via either the GOT or the stubs section,
-  // depending on the relocation type. prepareDylibSymbolRelocation() will set
-  // up the GOT/stubs entries, and getDylibSymbolVA() will return the addresses
-  // of those entries.
-  virtual void prepareDylibSymbolRelocation(DylibSymbol &, uint8_t type) = 0;
-  virtual uint64_t getDylibSymbolVA(const DylibSymbol &,
-                                    uint8_t type) const = 0;
+  // Symbols may be referenced via either the GOT or the stubs section,
+  // depending on the relocation type. prepareSymbolRelocation() will set up the
+  // GOT/stubs entries, and getSymbolVA() will return the addresses of those
+  // entries.
+  virtual void prepareSymbolRelocation(Symbol &, uint8_t type) = 0;
+  virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
 
   uint32_t cpuType;
   uint32_t cpuSubtype;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 96ede13a2b64..def82b578774 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -255,8 +255,8 @@ void Writer::scanRelocations() {
         if (isa<Undefined>(s))
           error("undefined symbol " + s->getName() + ", referenced from " +
                 sys::path::filename(isec->file->getName()));
-        else if (auto *dylibSymbol = dyn_cast<DylibSymbol>(s))
-          target->prepareDylibSymbolRelocation(*dylibSymbol, r.type);
+        else
+          target->prepareSymbolRelocation(*s, r.type);
       }
     }
   }

diff  --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s
new file mode 100644
index 000000000000..6099a6bf18c7
--- /dev/null
+++ b/lld/test/MachO/local-got.s
@@ -0,0 +1,58 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %p/Inputs/libhello.s \
+# RUN:   -o %t/libhello.o
+# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -dylib -install_name \
+# RUN:   @executable_path/libhello.dylib %t/libhello.o -o %t/libhello.dylib
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o -L%t -lhello
+# RUN: llvm-objdump --full-contents --bind %t/test | FileCheck %s --match-full-lines
+
+## Check that the GOT references the cstrings. --full-contents displays the
+## address offset and the contents at that address very similarly, so am using
+## --match-full-lines to make sure we match on the right thing.
+# CHECK:      Contents of section __cstring:
+# CHECK-NEXT: 1000003cc {{.*}}
+
+## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
+## start of __cstring
+# CHECK:      Contents of section __got:
+# CHECK-NEXT: [[#%X,ADDR:]]  da030000 01000000 cc030000 01000000 {{.*}}
+# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
+
+## Check that a non-locally-defined symbol is still bound at the correct offset:
+# CHECK: Bind table:
+# CHECK-NEXT: segment      section  address         type     addend  dylib     symbol
+# CHECK-NEXT: __DATA_CONST __got    0x[[#ADDR+16]]  pointer  0       libhello  _hello_its_me
+
+.globl _main
+
+.text
+_main:
+  movl $0x2000004, %eax # write() syscall
+  mov $1, %rdi # stdout
+  movq _hello_its_me at GOTPCREL(%rip), %rsi
+  mov $15, %rdx # length of str
+  syscall
+
+  movl $0x2000004, %eax # write() syscall
+  mov $1, %rdi # stdout
+  movq _hello_world at GOTPCREL(%rip), %rsi
+  mov $13, %rdx # length of str
+  syscall
+
+  movl $0x2000004, %eax # write() syscall
+  mov $1, %rdi # stdout
+  movq _goodbye_world at GOTPCREL(%rip), %rsi
+  mov $15, %rdx # length of str
+  syscall
+
+  mov $0, %rax
+  ret
+
+.section __TEXT,__cstring
+_hello_world:
+  .asciz "Hello world!\n"
+
+_goodbye_world:
+  .asciz "Goodbye world!\n"


        


More information about the llvm-commits mailing list