[lld] 5e85173 - [lld-macho] Fix semantics & add tests for ARM64 GOT/TLV relocs

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 23 19:02:51 PST 2021


Author: Jez Ng
Date: 2021-02-23T22:02:38-05:00
New Revision: 5e851733c5b603bec962bb4c5cf9d5cc93d88175

URL: https://github.com/llvm/llvm-project/commit/5e851733c5b603bec962bb4c5cf9d5cc93d88175
DIFF: https://github.com/llvm/llvm-project/commit/5e851733c5b603bec962bb4c5cf9d5cc93d88175.diff

LOG: [lld-macho] Fix semantics & add tests for ARM64 GOT/TLV relocs

I've adjusted the RelocAttrBits to better fit the semantics of
the relocations. In particular:

1. *_UNSIGNED relocations are no longer marked with the `TLV` bit, even
   though they can occur within TLV sections. Instead the `TLV` bit is
   reserved for relocations that can reference thread-local symbols, and
   *_UNSIGNED relocations have their own `UNSIGNED` bit. The previous
   implementation caused TLV and regular UNSIGNED semantics to be
   conflated, resulting in rebase opcodes being incorrectly emitted for TLV
   relocations.

2. I've added a new `POINTER` bit to denote non-relaxable GOT
   relocations. This distinction isn't important on x86 -- the GOT
   relocations there are either relaxable or non-relaxable loads -- but
   arm64 has `GOT_LOAD_PAGE21` which loads the page that the referent
   symbol is in (regardless of whether the symbol ends up in the GOT). This
   relocation must reference a GOT symbol (so must have the `GOT` bit set)
   but isn't itself relaxable (so must not have the `LOAD` bit). The
   `POINTER` bit is used for relocations that *must* reference a GOT
   slot.

3. A similar situation occurs for TLV relocations.

4. ld64 supports both a pcrel and an absolute version of
   ARM64_RELOC_POINTER_TO_GOT. But the semantics of the absolute version
   are pretty weird -- it results in the value of the GOT slot being
   written, rather than the address. (That means a reference to a
   dynamically-bound slot will result in zeroes being written.) The
   programs I've tried linking don't use this form of the relocation, so
   I've dropped our partial support for it by removing the relevant
   RelocAttrBits.

Reviewed By: alexshap

Differential Revision: https://reviews.llvm.org/D97031

Added: 
    lld/test/MachO/arm64-reloc-got-load.s
    lld/test/MachO/arm64-reloc-pointer-to-got.s
    lld/test/MachO/arm64-reloc-tlv-load.s

Modified: 
    lld/MachO/Arch/ARM64.cpp
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/Target.cpp
    lld/MachO/Target.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index cf51e7543fab..c67c9d5b6156 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -48,13 +48,16 @@ struct ARM64 : TargetInfo {
 // Random notes on reloc types:
 // ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12
 // SUBTRACTOR always pairs with UNSIGNED (a delta between two sections)
-// POINTER_TO_GOT: 4-byte is pc-relative, 8-byte is absolute
+// POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte
+// absolute version of this relocation. The semantics of the absolute relocation
+// are weird -- it results in the value of the GOT slot being written, instead
+// of the address. Let's not support it unless we find a real-world use case.
 
 const TargetInfo::RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
   static const std::array<TargetInfo::RelocAttrs, 11> relocAttrsArray{{
 #define B(x) RelocAttrBits::x
-      {"UNSIGNED", B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(TLV) | B(DYSYM8) |
-                       B(BYTE4) | B(BYTE8)},
+      {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) |
+                       B(DYSYM8) | B(BYTE4) | B(BYTE8)},
       {"SUBTRACTOR", B(SUBTRAHEND) | B(BYTE8)},
       {"BRANCH26", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
       {"PAGE21", B(PCREL) | B(EXTERN) | B(BYTE4)},
@@ -62,7 +65,7 @@ const TargetInfo::RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
       {"GOT_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
       {"GOT_LOAD_PAGEOFF12",
        B(ABSOLUTE) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
-      {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4) | B(BYTE8)},
+      {"POINTER_TO_GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
       {"TLVP_LOAD_PAGE21", B(PCREL) | B(EXTERN) | B(TLV) | B(BYTE4)},
       {"TLVP_LOAD_PAGEOFF12",
        B(ABSOLUTE) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},

diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index ad7b26af78b0..c7bb49392c37 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -45,12 +45,12 @@ struct X86_64 : TargetInfo {
 const TargetInfo::RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const {
   static const std::array<TargetInfo::RelocAttrs, 10> relocAttrsArray{{
 #define B(x) RelocAttrBits::x
-      {"UNSIGNED", B(TLV) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(DYSYM8) |
-                       B(BYTE4) | B(BYTE8)},
+      {"UNSIGNED", B(UNSIGNED) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) |
+                       B(DYSYM8) | B(BYTE4) | B(BYTE8)},
       {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
       {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
       {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
-      {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
+      {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(POINTER) | B(BYTE4)},
       {"SUBTRACTOR", B(SUBTRAHEND)},
       {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
       {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index e3c8f180f190..c2026a8787e5 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -221,7 +221,7 @@ static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
     error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
                   "be PC-relative"));
   if (isThreadLocalVariables(sec.flags) &&
-      !relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::BYTE8))
+      !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED))
     error(message("not allowed in thread-local section, must be UNSIGNED"));
   if (rel.r_length < 2 || rel.r_length > 3 ||
       !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index a1e556d4e99f..971ca6436fbe 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -40,12 +40,10 @@ static uint64_t resolveSymbolVA(uint8_t *loc, const lld::macho::Symbol &sym,
   if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
     if (sym.isInStubs())
       return in.stubs->addr + sym.stubsIndex * target->stubSize;
-  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) {
+  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
     if (sym.isInGot())
       return in.got->addr + sym.gotIndex * WordSize;
-  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
-    return in.got->addr + sym.gotIndex * WordSize;
-  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) {
+  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
     if (sym.isInGot())
       return in.tlvPointers->addr + sym.gotIndex * WordSize;
     assert(isa<Defined>(&sym));

diff  --git a/lld/MachO/Target.cpp b/lld/MachO/Target.cpp
index f3387c913d00..a37d1f76ae31 100644
--- a/lld/MachO/Target.cpp
+++ b/lld/MachO/Target.cpp
@@ -32,8 +32,7 @@ bool TargetInfo::validateSymbolRelocation(const Symbol *sym,
         .str();
   };
 
-  if ((relocAttrs.hasAttr(RelocAttrBits::TLV) &&
-       !relocAttrs.hasAttr(RelocAttrBits::BYTE8)) != sym->isTlv())
+  if (relocAttrs.hasAttr(RelocAttrBits::TLV) != sym->isTlv())
     error(message(Twine("requires that variable ") +
                   (sym->isTlv() ? "not " : "") + "be thread-local"));
   if (relocAttrs.hasAttr(RelocAttrBits::DYSYM8) && isa<DylibSymbol>(sym) &&

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index f09a576bc73a..f07a69eef91a 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -44,11 +44,13 @@ enum class RelocAttrBits {
   ADDEND = 1 << 6,     // *_ADDEND paired prefix reloc
   SUBTRAHEND = 1 << 7, // *_SUBTRACTOR paired prefix reloc
   BRANCH = 1 << 8,     // Value is branch target
-  GOT = 1 << 9,        // Pertains to Global Offset Table slots
-  TLV = 1 << 10,       // Pertains to Thread-Local Variable slots
+  GOT = 1 << 9,        // References a symbol in the Global Offset Table
+  TLV = 1 << 10,       // References a thread-local symbol
   DYSYM8 = 1 << 11,    // Requires DySym width to be 8 bytes
   LOAD = 1 << 12,      // Relaxable indirect load
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ (1 << 13) - 1),
+  POINTER = 1 << 13,   // Non-relaxable indirect load (pointer is taken)
+  UNSIGNED = 1 << 14,  // *_UNSIGNED relocs
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ (1 << 15) - 1),
 };
 
 class TargetInfo {

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index b1c7d8c71009..f4cd5cc6a578 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -408,15 +408,13 @@ static void prepareSymbolRelocation(lld::macho::Symbol *sym,
 
   if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
     prepareBranchTarget(sym);
-  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) {
-    if (needsBinding(sym))
-      in.got->addEntry(sym);
   } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
-    in.got->addEntry(sym);
-  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) {
+    if (relocAttrs.hasAttr(RelocAttrBits::POINTER) || needsBinding(sym))
+      in.got->addEntry(sym);
+  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
     if (needsBinding(sym))
       in.tlvPointers->addEntry(sym);
-  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
+  } else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) {
     // References from thread-local variable sections are treated as offsets
     // relative to the start of the referent section, and therefore have no
     // need of rebase opcodes.

diff  --git a/lld/test/MachO/arm64-reloc-got-load.s b/lld/test/MachO/arm64-reloc-got-load.s
new file mode 100644
index 000000000000..fb5256545456
--- /dev/null
+++ b/lld/test/MachO/arm64-reloc-got-load.s
@@ -0,0 +1,50 @@
+# REQUIRES: arm
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/main.s -o %t/main.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foobar.s -o %t/foobar.o
+
+# RUN: %lld -lSystem -arch arm64 -o %t/static %t/main.o %t/foobar.o
+# RUN: llvm-objdump --macho -d --no-show-raw-insn --syms %t/static | FileCheck %s --check-prefix=STATIC
+
+# RUN: %lld -lSystem -arch arm64 -dylib -o %t/libfoo.dylib %t/foobar.o
+# RUN: %lld -lSystem -arch arm64 -o %t/main %t/main.o %t/libfoo.dylib
+# RUN: llvm-objdump --macho -d --no-show-raw-insn --section-headers %t/main | FileCheck %s --check-prefix=DYLIB
+
+# STATIC-LABEL: _main:
+# STATIC-NEXT:  adrp	x8, [[#]] ; 0x[[#%x,PAGE:]]
+# STATIC-NEXT:  add	x8, x8, #[[#%u,FOO_OFF:]]
+# STATIC-NEXT:  adrp	x8, [[#]] ; 0x[[#PAGE]]
+# STATIC-NEXT:  add	x8, x8, #[[#%u,BAR_OFF:]]
+# STATIC-NEXT:  ret
+
+# STATIC-LABEL: SYMBOL TABLE:
+# STATIC-DAG:   {{0*}}[[#%x,PAGE+FOO_OFF]] g     F __TEXT,__text _foo
+# STATIC-DAG:   {{0*}}[[#%x,PAGE+BAR_OFF]] g     F __TEXT,__text _bar
+
+# DYLIB-LABEL: _main:
+# DYLIB-NEXT:  adrp	x8, [[#]] ; 0x[[#%x,GOT:]]
+# DYLIB-NEXT:  ldr	x8, [x8, #8] ; literal pool symbol address: _foo
+# DYLIB-NEXT:  adrp	x8, [[#]] ; 0x[[#GOT]]
+# DYLIB-NEXT:  ldr	x8, [x8] ; literal pool symbol address: _bar
+# DYLIB-NEXT:  ret
+# DYLIB-NEXT:  Sections:
+# DYLIB-NEXT:  Idx   Name          Size     VMA              Type
+# DYLIB:       [[#]] __got         00000010 {{0*}}[[#GOT]]   DATA
+
+#--- main.s
+.globl _main, _foo, _bar
+.p2align 2
+_main:
+	adrp x8, _foo at GOTPAGE
+	ldr	 x8, [x8, _foo at GOTPAGEOFF]
+	adrp x8, _bar at GOTPAGE
+	ldr	 x8, [x8, _bar at GOTPAGEOFF]
+  ret
+
+#--- foobar.s
+.globl _foo, _bar
+_foo:
+  .space 0
+_bar:
+  .space 0

diff  --git a/lld/test/MachO/arm64-reloc-pointer-to-got.s b/lld/test/MachO/arm64-reloc-pointer-to-got.s
new file mode 100644
index 000000000000..5bf98a29b7a7
--- /dev/null
+++ b/lld/test/MachO/arm64-reloc-pointer-to-got.s
@@ -0,0 +1,36 @@
+# REQUIRES: arm
+
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
+# RUN: %lld -lSystem -arch arm64 -o %t %t.o
+# RUN: llvm-objdump --macho -d --full-contents --section-headers %t | FileCheck %s
+
+## FIXME: Even though we have reserved a GOT slot for _foo due to
+## POINTER_TO_GOT, we should still be able to relax this GOT_LOAD reference to
+## it.
+# CHECK:      _main:
+# CHECK-NEXT:	adrp	x8, [[#]] ;
+# CHECK-NEXT:	ldr	x8, [x8] ; literal pool symbol address: _foo
+# CHECK-NEXT:	ret
+
+# CHECK: Idx   Name          Size     VMA              Type
+# CHECK: [[#]] __got         00000008 0000000100004000 DATA
+# CHECK: [[#]] __data        00000004 0000000100008000 DATA
+
+## The relocated data should contain the 
diff erence between the addresses of
+## __data and __got in little-endian form.
+# CHECK:       Contents of section __DATA,__data:
+# CHECK-NEXT:  100008000 00c0ffff
+
+.globl _main, _foo
+.p2align 2
+_main:
+  adrp x8, _foo at GOTPAGE
+  ldr  x8, [x8, _foo at GOTPAGEOFF]
+  ret
+
+.p2align 2
+_foo:
+  ret
+
+.data
+.long _foo at GOT - .

diff  --git a/lld/test/MachO/arm64-reloc-tlv-load.s b/lld/test/MachO/arm64-reloc-tlv-load.s
new file mode 100644
index 000000000000..1b911a687fea
--- /dev/null
+++ b/lld/test/MachO/arm64-reloc-tlv-load.s
@@ -0,0 +1,62 @@
+# REQUIRES: arm
+
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/main.s -o %t/main.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foobar.s -o %t/foobar.o
+
+# RUN: %lld -lSystem -arch arm64 -o %t/static %t/main.o %t/foobar.o
+# RUN: llvm-objdump --macho -d --no-show-raw-insn --syms %t/static | FileCheck %s --check-prefix=STATIC
+
+# RUN: %lld -lSystem -arch arm64 -dylib -o %t/libfoo.dylib %t/foobar.o
+# RUN: %lld -lSystem -arch arm64 -o %t/main %t/main.o %t/libfoo.dylib
+# RUN: llvm-objdump --macho -d --no-show-raw-insn --section-headers %t/main | FileCheck %s --check-prefix=DYLIB
+
+# STATIC-LABEL: _main:
+# STATIC-NEXT:  adrp x8, [[#]] ; 0x[[#%x,PAGE:]]
+# STATIC-NEXT:  add  x8, x8, #[[#%u,FOO_OFF:]]
+# STATIC-NEXT:  adrp x8, [[#]] ; 0x[[#PAGE]]
+# STATIC-NEXT:  add  x8, x8, #[[#%u,BAR_OFF:]]
+# STATIC-NEXT:  ret
+
+# STATIC-LABEL: SYMBOL TABLE:
+# STATIC-DAG:   {{0*}}[[#%x,PAGE+FOO_OFF]] g     O __DATA,__thread_vars _foo
+# STATIC-DAG:   {{0*}}[[#%x,PAGE+BAR_OFF]] g     O __DATA,__thread_vars _bar
+
+# DYLIB-LABEL: _main:
+# DYLIB-NEXT:  adrp x8, [[#]] ; 0x[[#%x,TLV:]]
+# DYLIB-NEXT:  ldr  x8, [x8, #8] ; literal pool symbol address: _foo
+# DYLIB-NEXT:  adrp x8, [[#]] ; 0x[[#TLV]]
+# DYLIB-NEXT:  ldr  x8, [x8] ; literal pool symbol address: _bar
+# DYLIB-NEXT:  ret
+# DYLIB-NEXT:  Sections:
+# DYLIB-NEXT:  Idx   Name          Size     VMA              Type
+# DYLIB:       [[#]] __thread_ptrs 00000010 {{0*}}[[#TLV]]   DATA
+
+#--- main.s
+.globl _main, _foo, _bar
+.p2align 2
+_main:
+  adrp x8, _foo at TLVPPAGE
+  ldr  x8, [x8, _foo at TLVPPAGEOFF]
+  adrp x8, _bar at TLVPPAGE
+  ldr  x8, [x8, _bar at TLVPPAGEOFF]
+  ret
+
+#--- foobar.s
+.globl _foo, _bar
+
+.section  __DATA,__thread_data,thread_local_regular
+_foo$tlv$init:
+  .long 123
+_bar$tlv$init:
+  .long 123
+
+.section  __DATA,__thread_vars,thread_local_variables
+_foo:
+  .quad __tlv_bootstrap
+  .quad 0
+  .quad _foo$tlv$init
+_bar:
+  .quad __tlv_bootstrap
+  .quad 0
+  .quad _bar$tlv$init


        


More information about the llvm-commits mailing list