[lld] ca85e37 - [lld-macho] Support static linking of thread-locals

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 7 11:05:13 PDT 2020


Author: Jez Ng
Date: 2020-08-07T11:04:52-07:00
New Revision: ca85e3733816ce444b86dd4598c1a98a54464b10

URL: https://github.com/llvm/llvm-project/commit/ca85e3733816ce444b86dd4598c1a98a54464b10
DIFF: https://github.com/llvm/llvm-project/commit/ca85e3733816ce444b86dd4598c1a98a54464b10.diff

LOG: [lld-macho] Support static linking of thread-locals

Note: What ELF refers to as "TLS", Mach-O seems to refer to as "TLV", i.e.
thread-local variables.

This diff implements support for TLV relocations that reference defined
symbols. On x86_64, TLV relocations are always used with movq opcodes, so for
defined TLVs, we don't need to create a synthetic section to store the
addresses of the symbols -- we can just convert the `movq` to a `leaq`.

One notable quirk of Mach-O's TLVs is that absolute-address relocations
inside TLV-defining sections behave differently -- their addresses are
no longer absolute, but relative to the start of the target section.
(AFAICT, RIP-relative relocations are not allowed in these sections.)

Reviewed By: #lld-macho, compnerd, smeenai

Differential Revision: https://reviews.llvm.org/D85080

Added: 
    lld/test/MachO/invalid/bad-tlv-def.s
    lld/test/MachO/invalid/bad-tlv-opcode.s
    lld/test/MachO/tlv.s

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Target.h
    lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 458dad805b4a..c43ed5a17c9a 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -36,7 +36,8 @@ struct X86_64 : TargetInfo {
 
   void prepareSymbolRelocation(lld::macho::Symbol &, const InputSection *,
                                const Reloc &) override;
-  uint64_t getSymbolVA(const lld::macho::Symbol &, uint8_t type) const override;
+  uint64_t resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &,
+                           uint8_t type) const override;
 };
 
 } // namespace
@@ -72,6 +73,11 @@ uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
                                    const relocation_info &rel) const {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
+
+  if (isThreadLocalVariables(sec.flags) && rel.r_type != X86_64_RELOC_UNSIGNED)
+    error("relocations in thread-local variable sections must be "
+          "X86_64_RELOC_UNSIGNED");
+
   switch (rel.r_type) {
   case X86_64_RELOC_BRANCH:
     // XXX: ld64 also supports r_length = 0 here but I'm not sure when such a
@@ -84,6 +90,7 @@ uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
   case X86_64_RELOC_SIGNED_4:
   case X86_64_RELOC_GOT_LOAD:
   case X86_64_RELOC_GOT:
+  case X86_64_RELOC_TLV:
     if (!rel.r_pcrel)
       fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " +
             std::to_string(rel.r_type) + " must be pcrel");
@@ -123,6 +130,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const {
   case X86_64_RELOC_SIGNED_4:
   case X86_64_RELOC_GOT_LOAD:
   case X86_64_RELOC_GOT:
+  case X86_64_RELOC_TLV:
     // These types are only used for pc-relative relocations, so offset by 4
     // since the RIP has advanced by 4 at this point. This is only valid when
     // r_length = 2, which is enforced by validateLength().
@@ -239,8 +247,13 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
   case X86_64_RELOC_SIGNED_2:
   case X86_64_RELOC_SIGNED_4:
     break;
-  case X86_64_RELOC_SUBTRACTOR:
   case X86_64_RELOC_TLV:
+    if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+      error("relocations to thread-local dylib symbols not yet implemented");
+    else
+      assert(isa<Defined>(&sym));
+    break;
+  case X86_64_RELOC_SUBTRACTOR:
     fatal("TODO: handle relocation type " + std::to_string(r.type));
     break;
   default:
@@ -248,8 +261,8 @@ void X86_64::prepareSymbolRelocation(lld::macho::Symbol &sym,
   }
 }
 
-uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym,
-                             uint8_t type) const {
+uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym,
+                                 uint8_t type) const {
   switch (type) {
   case X86_64_RELOC_GOT_LOAD:
   case X86_64_RELOC_GOT:
@@ -264,8 +277,18 @@ uint64_t X86_64::getSymbolVA(const lld::macho::Symbol &sym,
   case X86_64_RELOC_SIGNED_2:
   case X86_64_RELOC_SIGNED_4:
     return sym.getVA();
+  case X86_64_RELOC_TLV: {
+    if (auto *dysym = dyn_cast<DylibSymbol>(&sym))
+      error("relocations to thread-local dylib symbols not yet implemented");
+
+    // Convert the movq to a leaq.
+    assert(isa<Defined>(&sym));
+    if (buf[-2] != 0x8b)
+      error("X86_64_RELOC_TLV must be used with movq instructions");
+    buf[-2] = 0x8d;
+    return sym.getVA();
+  }
   case X86_64_RELOC_SUBTRACTOR:
-  case X86_64_RELOC_TLV:
     fatal("TODO: handle relocation type " + std::to_string(type));
   default:
     llvm_unreachable("Unexpected relocation type");

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 72d489283051..a7fcf49334b2 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -35,10 +35,19 @@ void InputSection::writeTo(uint8_t *buf) {
 
   for (Reloc &r : relocs) {
     uint64_t va = 0;
-    if (auto *s = r.target.dyn_cast<Symbol *>())
-      va = target->getSymbolVA(*s, r.type);
-    else if (auto *isec = r.target.dyn_cast<InputSection *>())
+    if (auto *s = r.target.dyn_cast<Symbol *>()) {
+      va = target->resolveSymbolVA(buf + r.offset, *s, r.type);
+
+      if (isThreadLocalVariables(flags)) {
+        // References from thread-local variable sections are treated as
+        // offsets relative to the start of the target section, instead of as
+        // absolute addresses.
+        if (auto *defined = dyn_cast<Defined>(s))
+          va -= defined->isec->parent->addr;
+      }
+    } else if (auto *isec = r.target.dyn_cast<InputSection *>()) {
       va = isec->getVA();
+    }
 
     uint64_t val = va + r.addend;
     if (r.pcrel)

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index ab2ac63ac82a..93517298f6c7 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -38,6 +38,10 @@ struct Reloc {
 inline bool isZeroFill(uint8_t flags) {
   return llvm::MachO::isVirtualSection(flags & llvm::MachO::SECTION_TYPE);
 }
+
+inline bool isThreadLocalVariables(uint8_t flags) {
+  return (flags & llvm::MachO::SECTION_TYPE) ==
+         llvm::MachO::S_THREAD_LOCAL_VARIABLES;
 }
 
 class InputSection {

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index ac772da0129f..5829319f5c2e 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -59,9 +59,19 @@ void MachHeaderSection::writeTo(uint8_t *buf) const {
   hdr->ncmds = loadCommands.size();
   hdr->sizeofcmds = sizeOfCmds;
   hdr->flags = MachO::MH_NOUNDEFS | MachO::MH_DYLDLINK | MachO::MH_TWOLEVEL;
+
   if (config->outputType == MachO::MH_DYLIB && !config->hasReexports)
     hdr->flags |= MachO::MH_NO_REEXPORTED_DYLIBS;
 
+  for (OutputSegment *seg : outputSegments) {
+    for (OutputSection *osec : seg->getSections()) {
+      if (isThreadLocalVariables(osec->flags)) {
+        hdr->flags |= MachO::MH_HAS_TLV_DESCRIPTORS;
+        break;
+      }
+    }
+  }
+
   uint8_t *p = reinterpret_cast<uint8_t *>(hdr + 1);
   for (LoadCommand *lc : loadCommands) {
     lc->writeTo(p);

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 8ea1bde12307..cbee6afa6b61 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -51,11 +51,13 @@ class TargetInfo {
 
   // Symbols may be referenced via either the GOT or the stubs section,
   // depending on the relocation type. prepareSymbolRelocation() will set up the
-  // GOT/stubs entries, and getSymbolVA() will return the addresses of those
-  // entries.
+  // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
+  // entries. resolveSymbolVA() may also relax the target instructions to save
+  // on a level of address indirection.
   virtual void prepareSymbolRelocation(Symbol &, const InputSection *,
                                        const Reloc &) = 0;
-  virtual uint64_t getSymbolVA(const Symbol &, uint8_t type) const = 0;
+  virtual uint64_t resolveSymbolVA(uint8_t *buf, const Symbol &,
+                                   uint8_t type) const = 0;
 
   uint32_t cpuType;
   uint32_t cpuSubtype;

diff  --git a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
index fddd192630d1..db13fe21dbbe 100644
--- a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
+++ b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
@@ -18,7 +18,7 @@ current-version:  0001.001.1
 parent-umbrella:  System
 exports:
   - archs:        [ 'x86_64' ]
-    symbols:      [ dyld_stub_binder ]
+    symbols:      [ dyld_stub_binder, __tlv_bootstrap ]
 --- !tapi-tbd-v3
 archs:            [ x86_64 ]
 uuids:            [ 'x86_64: 00000000-0000-0000-0000-000000000002' ]

diff  --git a/lld/test/MachO/invalid/bad-tlv-def.s b/lld/test/MachO/invalid/bad-tlv-def.s
new file mode 100644
index 000000000000..8ba24858542d
--- /dev/null
+++ b/lld/test/MachO/invalid/bad-tlv-def.s
@@ -0,0 +1,15 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s
+
+# CHECK: error: relocations in thread-local variable sections must be X86_64_RELOC_UNSIGNED
+
+.text
+.globl _main
+_main:
+  ret
+
+.section	__DATA,__thread_vars,thread_local_variables
+.globl	_foo, _bar
+_foo:
+  movq _bar at GOTPCREL(%rip), %rax

diff  --git a/lld/test/MachO/invalid/bad-tlv-opcode.s b/lld/test/MachO/invalid/bad-tlv-opcode.s
new file mode 100644
index 000000000000..4c0ad039899a
--- /dev/null
+++ b/lld/test/MachO/invalid/bad-tlv-opcode.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s
+
+# CHECK: error: X86_64_RELOC_TLV must be used with movq instructions
+
+.text
+.globl _main
+_main:
+  leaq _foo at TLVP(%rip), %rax
+  ret
+
+.section	__DATA,__thread_vars,thread_local_variables
+_foo:

diff  --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s
new file mode 100644
index 000000000000..d8e02eeabe2e
--- /dev/null
+++ b/lld/test/MachO/tlv.s
@@ -0,0 +1,57 @@
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: lld -flavor darwinnew -L%S/Inputs/MacOSX.sdk/usr/lib -lSystem -o %t/test %t/test.o
+# RUN: llvm-readobj --file-headers %t/test | FileCheck %s --check-prefix=HEADER
+# RUN: llvm-objdump -D %t/test | FileCheck %s
+
+# HEADER: MH_HAS_TLV_DESCRIPTORS
+
+# CHECK:       Disassembly of section __TEXT,__text:
+# CHECK-EMPTY:
+# CHECK-NEXT:  <_main>:
+# CHECK-NEXT:  leaq    {{.*}}(%rip), %rax  # {{.*}} <_foo>
+# CHECK-NEXT:  leaq    {{.*}}(%rip), %rax  # {{.*}} <_bar>
+# CHECK-NEXT:  retq
+# CHECK-EMPTY:
+# CHECK-NEXT:  Disassembly of section __DATA,__thread_data:
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__thread_data>:
+# CHECK-NEXT:  ef
+# CHECK-NEXT:  be ad de be ba
+# CHECK-NEXT:  fe ca
+# CHECK-EMPTY:
+# CHECK-NEXT:  Disassembly of section __DATA,__thread_vars:
+# CHECK-EMPTY:
+# CHECK-NEXT: <_foo>:
+# CHECK-NEXT:          ...
+# CHECK-EMPTY:
+# CHECK-NEXT:  <_bar>:
+# CHECK-NEXT:          ...
+# CHECK-NEXT:  04 00
+# CHECK-NEXT:  00 00
+# CHECK-NEXT:  00 00
+# CHECK-NEXT:  00 00
+
+.globl _main
+_main:
+  mov _foo at TLVP(%rip), %rax
+  mov _bar at TLVP(%rip), %rax
+  ret
+
+.section	__DATA,__thread_data,thread_local_regular
+_foo$tlv$init:
+  .long	0xdeadbeef
+_bar$tlv$init:
+  .long	0xcafebabe
+
+.section	__DATA,__thread_vars,thread_local_variables
+.globl	_foo, _bar
+_foo:
+  .quad	__tlv_bootstrap
+  .quad	0
+  .quad	_foo$tlv$init
+_bar:
+  .quad	__tlv_bootstrap
+  .quad	0
+  .quad	_bar$tlv$init


        


More information about the llvm-commits mailing list