[llvm] 3a9d2f1 - [lld-macho][NFC] refactor relocation handling

Greg McGary via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 09:55:33 PST 2021


Author: Greg McGary
Date: 2021-02-02T10:54:53-07:00
New Revision: 3a9d2f1488f0a767d4e55dc701bbdfc5438bbb3e

URL: https://github.com/llvm/llvm-project/commit/3a9d2f1488f0a767d4e55dc701bbdfc5438bbb3e
DIFF: https://github.com/llvm/llvm-project/commit/3a9d2f1488f0a767d4e55dc701bbdfc5438bbb3e.diff

LOG: [lld-macho][NFC] refactor relocation handling

Add per-reloc-type attribute bits and migrate code from per-target file into target independent code, driven by reloc attributes.

Many cleanups

Differential Revision: https://reviews.llvm.org/D95121

Added: 
    lld/test/MachO/x86-64-reloc-subtract.s

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/Target.cpp
    lld/MachO/Target.h
    lld/MachO/Writer.cpp
    lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s
    lld/test/MachO/invalid/bad-got-to-tlv-reference.s
    lld/test/MachO/invalid/bad-tlv-def.s
    lld/test/MachO/invalid/bad-tlv-opcode.s
    lld/test/MachO/invalid/bad-tlv-relocation.s
    lld/test/MachO/invalid/invalid-relocation-length.yaml
    lld/test/MachO/invalid/invalid-relocation-pcrel.yaml
    llvm/include/llvm/BinaryFormat/MachO.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 729ef603adb7..b7579dc6b420 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -25,98 +25,51 @@ namespace {
 struct X86_64 : TargetInfo {
   X86_64();
 
-  bool isPairedReloc(relocation_info) const override;
-  uint64_t getAddend(MemoryBufferRef, const section_64 &, relocation_info,
-                     relocation_info) const override;
-  void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override;
+  uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
+                             const relocation_info) const override;
+  void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
+                   uint64_t pc) const override;
 
   void writeStub(uint8_t *buf, const macho::Symbol &) const override;
   void writeStubHelperHeader(uint8_t *buf) const override;
   void writeStubHelperEntry(uint8_t *buf, const DylibSymbol &,
                             uint64_t entryAddr) const override;
 
-  void prepareSymbolRelocation(lld::macho::Symbol *, const InputSection *,
-                               const Reloc &) override;
-  uint64_t resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &,
-                           uint8_t type) const override;
+  void relaxGotLoad(uint8_t *loc, uint8_t type) const override;
+  const TargetInfo::RelocAttrs &getRelocAttrs(uint8_t type) const override;
 };
 
 } // namespace
 
-static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec,
-                                    relocation_info rel) {
-  return ("invalid relocation at offset " + std::to_string(rel.r_address) +
-          " of " + sec.segname + "," + sec.sectname + " in " +
-          mb.getBufferIdentifier())
-      .str();
+const TargetInfo::RelocAttrs &X86_64::getRelocAttrs(uint8_t type) const {
+  static const std::array<TargetInfo::RelocAttrs, 10> relocAttrsArray{{
+#define B(x) RelocAttrBits::x
+      {"UNSIGNED", B(TLV) | B(ABSOLUTE) | B(EXTERN) | B(LOCAL) | B(DYSYM8) |
+                       B(BYTE4) | B(BYTE8)},
+      {"SIGNED", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
+      {"BRANCH", B(PCREL) | B(EXTERN) | B(BRANCH) | B(BYTE4)},
+      {"GOT_LOAD", B(PCREL) | B(EXTERN) | B(GOT) | B(LOAD) | B(BYTE4)},
+      {"GOT", B(PCREL) | B(EXTERN) | B(GOT) | B(BYTE4)},
+      {"SUBTRACTOR", B(SUBTRAHEND)},
+      {"SIGNED_1", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
+      {"SIGNED_2", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
+      {"SIGNED_4", B(PCREL) | B(EXTERN) | B(LOCAL) | B(BYTE4)},
+      {"TLV", B(PCREL) | B(EXTERN) | B(TLV) | B(LOAD) | B(BYTE4)},
+#undef B
+  }};
+  assert(type >= 0 && type < relocAttrsArray.size() &&
+         "invalid relocation type");
+  if (type < 0 || type >= relocAttrsArray.size())
+    return TargetInfo::invalidRelocAttrs;
+  return relocAttrsArray[type];
 }
 
-static void validateLength(MemoryBufferRef mb, const section_64 &sec,
-                           relocation_info rel,
-                           ArrayRef<uint8_t> validLengths) {
-  if (find(validLengths, rel.r_length) != validLengths.end())
-    return;
-
-  std::string msg = getErrorLocation(mb, sec, rel) + ": relocations of type " +
-                    std::to_string(rel.r_type) + " must have r_length of ";
-  bool first = true;
-  for (uint8_t length : validLengths) {
-    if (!first)
-      msg += " or ";
-    first = false;
-    msg += std::to_string(length);
-  }
-  fatal(msg);
-}
-
-bool X86_64::isPairedReloc(relocation_info rel) const {
-  return rel.r_type == X86_64_RELOC_SUBTRACTOR;
-}
-
-uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec,
-                           relocation_info rel,
-                           relocation_info pairedRel) const {
+uint64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
+                                   relocation_info rel) const {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
 
-  if (isThreadLocalVariables(sec.flags) && rel.r_type != X86_64_RELOC_UNSIGNED)
-    error("relocations in thread-local variable sections must be "
-          "X86_64_RELOC_UNSIGNED");
-
-  switch (rel.r_type) {
-  case X86_64_RELOC_BRANCH:
-    // XXX: ld64 also supports r_length = 0 here but I'm not sure when such a
-    // relocation will actually be generated.
-    validateLength(mb, sec, rel, {2});
-    break;
-  case X86_64_RELOC_SIGNED:
-  case X86_64_RELOC_SIGNED_1:
-  case X86_64_RELOC_SIGNED_2:
-  case X86_64_RELOC_SIGNED_4:
-  case X86_64_RELOC_GOT_LOAD:
-  case X86_64_RELOC_GOT:
-  case X86_64_RELOC_TLV:
-    if (!rel.r_pcrel)
-      fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " +
-            std::to_string(rel.r_type) + " must be pcrel");
-    validateLength(mb, sec, rel, {2});
-    break;
-  case X86_64_RELOC_UNSIGNED:
-    if (rel.r_pcrel)
-      fatal(getErrorLocation(mb, sec, rel) + ": relocations of type " +
-            std::to_string(rel.r_type) + " must not be pcrel");
-    validateLength(mb, sec, rel, {2, 3});
-    break;
-  default:
-    error("TODO: Unhandled relocation type " + std::to_string(rel.r_type));
-    return 0;
-  }
-
   switch (rel.r_length) {
-  case 0:
-    return *loc;
-  case 1:
-    return read16le(loc);
   case 2:
     return read32le(loc);
   case 3:
@@ -126,40 +79,17 @@ uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec,
   }
 }
 
-void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const {
-  switch (r.type) {
-  case X86_64_RELOC_BRANCH:
-  case X86_64_RELOC_SIGNED:
-  case X86_64_RELOC_SIGNED_1:
-  case X86_64_RELOC_SIGNED_2:
-  case X86_64_RELOC_SIGNED_4:
-  case X86_64_RELOC_GOT_LOAD:
-  case X86_64_RELOC_GOT:
-  case X86_64_RELOC_TLV:
-    // These types are only used for pc-relative relocations, so offset by 4
-    // since the RIP has advanced by 4 at this point. This is only valid when
-    // r_length = 2, which is enforced by validateLength().
-    val -= 4;
-    break;
-  case X86_64_RELOC_UNSIGNED:
-    break;
-  default:
-    llvm_unreachable(
-        "getAddend should have flagged all unhandled relocation types");
-  }
-
+void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
+                         uint64_t pc) const {
+  value += r.addend;
+  if (r.pcrel)
+    value -= (pc + 4);
   switch (r.length) {
-  case 0:
-    *loc = val;
-    break;
-  case 1:
-    write16le(loc, val);
-    break;
   case 2:
-    write32le(loc, val);
+    write32le(loc, value);
     break;
   case 3:
-    write64le(loc, val);
+    write64le(loc, value);
     break;
   default:
     llvm_unreachable("invalid r_length");
@@ -201,11 +131,6 @@ static constexpr uint8_t stubHelperHeader[] = {
     0x90,                         // 0xf: nop
 };
 
-static constexpr uint8_t stubHelperEntry[] = {
-    0x68, 0, 0, 0, 0, // 0x0: pushq <bind offset>
-    0xe9, 0, 0, 0, 0, // 0x5: jmp <__stub_helper>
-};
-
 void X86_64::writeStubHelperHeader(uint8_t *buf) const {
   memcpy(buf, stubHelperHeader, sizeof(stubHelperHeader));
   writeRipRelative(buf, in.stubHelper->addr, 7, in.imageLoaderCache->getVA());
@@ -214,6 +139,11 @@ void X86_64::writeStubHelperHeader(uint8_t *buf) const {
                        in.stubHelper->stubBinder->gotIndex * WordSize);
 }
 
+static constexpr uint8_t stubHelperEntry[] = {
+    0x68, 0, 0, 0, 0, // 0x0: pushq <bind offset>
+    0xe9, 0, 0, 0, 0, // 0x5: jmp <__stub_helper>
+};
+
 void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                                   uint64_t entryAddr) const {
   memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry));
@@ -222,110 +152,11 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                    in.stubHelper->addr);
 }
 
-void X86_64::prepareSymbolRelocation(lld::macho::Symbol *sym,
-                                     const InputSection *isec, const Reloc &r) {
-  switch (r.type) {
-  case X86_64_RELOC_GOT_LOAD: {
-    if (needsBinding(sym))
-      in.got->addEntry(sym);
-
-    if (sym->isTlv())
-      error("found GOT relocation referencing thread-local variable in " +
-            toString(isec));
-    break;
-  }
-  case X86_64_RELOC_GOT: {
-    in.got->addEntry(sym);
-
-    if (sym->isTlv())
-      error("found GOT relocation referencing thread-local variable in " +
-            toString(isec));
-    break;
-  }
-  case X86_64_RELOC_BRANCH: {
-    prepareBranchTarget(sym);
-    break;
-  }
-  case X86_64_RELOC_UNSIGNED: {
-    if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
-      if (r.length != 3) {
-        error("X86_64_RELOC_UNSIGNED referencing the dynamic symbol " +
-              dysym->getName() + " must have r_length = 3");
-        return;
-      }
-    }
-    // References from thread-local variable sections are treated as offsets
-    // relative to the start of the referent section, and therefore have no
-    // need of rebase opcodes.
-    if (!(isThreadLocalVariables(isec->flags) && isa<Defined>(sym)))
-      addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
-    break;
-  }
-  case X86_64_RELOC_SIGNED:
-  case X86_64_RELOC_SIGNED_1:
-  case X86_64_RELOC_SIGNED_2:
-  case X86_64_RELOC_SIGNED_4:
-    // TODO: warn if they refer to a weak global
-    break;
-  case X86_64_RELOC_TLV: {
-    if (needsBinding(sym))
-      in.tlvPointers->addEntry(sym);
-
-    if (!sym->isTlv())
-      error(
-          "found X86_64_RELOC_TLV referencing a non-thread-local variable in " +
-          toString(isec));
-    break;
-  }
-  case X86_64_RELOC_SUBTRACTOR:
-    fatal("TODO: handle relocation type " + std::to_string(r.type));
-    break;
-  default:
-    llvm_unreachable("unexpected relocation type");
-  }
-}
-
-uint64_t X86_64::resolveSymbolVA(uint8_t *buf, const lld::macho::Symbol &sym,
-                                 uint8_t type) const {
-  switch (type) {
-  case X86_64_RELOC_GOT_LOAD: {
-    if (!sym.isInGot()) {
-      if (buf[-2] != 0x8b)
-        error("X86_64_RELOC_GOT_LOAD must be used with movq instructions");
-      buf[-2] = 0x8d;
-      return sym.getVA();
-    }
-    LLVM_FALLTHROUGH;
-  }
-  case X86_64_RELOC_GOT:
-    return in.got->addr + sym.gotIndex * WordSize;
-  case X86_64_RELOC_BRANCH: {
-    if (sym.isInStubs())
-      return in.stubs->addr + sym.stubsIndex * sizeof(stub);
-    return sym.getVA();
-  }
-  case X86_64_RELOC_UNSIGNED:
-  case X86_64_RELOC_SIGNED:
-  case X86_64_RELOC_SIGNED_1:
-  case X86_64_RELOC_SIGNED_2:
-  case X86_64_RELOC_SIGNED_4:
-    return sym.getVA();
-  case X86_64_RELOC_TLV: {
-    if (sym.isInGot())
-      return in.tlvPointers->addr + sym.gotIndex * WordSize;
-
-    // Convert the movq to a leaq.
-    assert(isa<Defined>(&sym));
-    if (buf[-2] != 0x8b)
-      error("X86_64_RELOC_TLV must be used with movq instructions");
-    buf[-2] = 0x8d;
-    return sym.getVA();
-  }
-  case X86_64_RELOC_SUBTRACTOR:
-    fatal("TODO: handle relocation type " + std::to_string(type));
-  default:
-    llvm_unreachable("Unexpected relocation type");
-  }
+void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
+  // Convert MOVQ to LEAQ
+  if (loc[-2] != 0x8b)
+    error(getRelocAttrs(type).name + " reloc requires MOVQ instruction");
+  loc[-2] = 0x8d;
 }
 
 X86_64::X86_64() {

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 1b337f38f7ba..59ea3025c89e 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -310,11 +310,10 @@ static InputFile *addFile(StringRef path, bool forceLoadArchive) {
     break;
   case file_magic::macho_dynamically_linked_shared_lib:
   case file_magic::macho_dynamically_linked_shared_lib_stub:
-  case file_magic::tapi_file: {
+  case file_magic::tapi_file:
     if (Optional<DylibFile *> dylibFile = loadDylib(mbref))
       newFile = *dylibFile;
     break;
-  }
   case file_magic::bitcode:
     newFile = make<BitcodeFile>(mbref);
     break;
@@ -786,13 +785,11 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     case OPT_INPUT:
       addFile(arg->getValue(), false);
       break;
-    case OPT_weak_library: {
-      auto *dylibFile =
-          dyn_cast_or_null<DylibFile>(addFile(arg->getValue(), false));
-      if (dylibFile)
+    case OPT_weak_library:
+      if (auto *dylibFile =
+              dyn_cast_or_null<DylibFile>(addFile(arg->getValue(), false)))
         dylibFile->forceWeakImport = true;
       break;
-    }
     case OPT_filelist:
       addFileList(arg->getValue());
       break;

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3d4d98b51606..d75a857f5c8d 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -203,6 +203,38 @@ static InputSection *findContainingSubsection(SubsectionMap &map,
   return it->second;
 }
 
+static bool validateRelocationInfo(MemoryBufferRef mb, const section_64 &sec,
+                                   relocation_info rel) {
+  const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
+  bool valid = true;
+  auto message = [relocAttrs, mb, sec, rel, &valid](const Twine &diagnostic) {
+    valid = false;
+    return (relocAttrs.name + " relocation " + diagnostic + " at offset " +
+            std::to_string(rel.r_address) + " of " + sec.segname + "," +
+            sec.sectname + " in " + mb.getBufferIdentifier())
+        .str();
+  };
+
+  if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern)
+    error(message("must be extern"));
+  if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel)
+    error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") +
+                  "be PC-relative"));
+  if (isThreadLocalVariables(sec.flags) &&
+      (!relocAttrs.hasAttr(RelocAttrBits::TLV) ||
+       relocAttrs.hasAttr(RelocAttrBits::LOAD)))
+    error(message("not allowed in thread-local section, must be UNSIGNED"));
+  if (rel.r_length < 2 || rel.r_length > 3 ||
+      !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) {
+    static SmallVector<StringRef, 4> widths{"INVALID", "4", "8", "4 or 8"};
+    error(message("has width " + std::to_string(1 << rel.r_length) +
+                  " bytes, but must be " +
+                  widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] +
+                  " bytes"));
+  }
+  return valid;
+}
+
 void ObjFile::parseRelocations(const section_64 &sec,
                                SubsectionMap &subsecMap) {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
@@ -217,8 +249,8 @@ void ObjFile::parseRelocations(const section_64 &sec,
     //
     // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
     // and the paired *_RELOC_UNSIGNED record holds the minuend. The
-    // datum for each is a symbolic address. The result is the runtime
-    // offset between two addresses.
+    // datum for each is a symbolic address. The result is the offset
+    // between two addresses.
     //
     // The ARM64_RELOC_ADDEND record holds the addend, and the paired
     // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
@@ -235,23 +267,35 @@ void ObjFile::parseRelocations(const section_64 &sec,
     // and insert them. Storing addends in the instruction stream is
     // possible, but inconvenient and more costly at link time.
 
-    relocation_info pairedInfo = relInfos[i];
-    relocation_info relInfo =
-        target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo;
+    uint64_t pairedAddend = 0;
+    relocation_info relInfo = relInfos[i];
+    if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
+      pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
+      relInfo = relInfos[++i];
+    }
     assert(i < relInfos.size());
+    if (!validateRelocationInfo(mb, sec, relInfo))
+      continue;
     if (relInfo.r_address & R_SCATTERED)
       fatal("TODO: Scattered relocations not supported");
-
+    uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
+    assert(!(embeddedAddend && pairedAddend));
+    uint64_t totalAddend = pairedAddend + embeddedAddend;
+
+    Reloc p;
+    if (target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND)) {
+      p.type = relInfo.r_type;
+      p.referent = symbols[relInfo.r_symbolnum];
+      relInfo = relInfos[++i];
+    }
     Reloc r;
     r.type = relInfo.r_type;
     r.pcrel = relInfo.r_pcrel;
     r.length = relInfo.r_length;
     r.offset = relInfo.r_address;
-    // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored
-    uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo);
     if (relInfo.r_extern) {
       r.referent = symbols[relInfo.r_symbolnum];
-      r.addend = rawAddend;
+      r.addend = totalAddend;
     } else {
       SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
       const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
@@ -263,16 +307,19 @@ void ObjFile::parseRelocations(const section_64 &sec,
         // TODO: The offset of 4 is probably not right for ARM64, nor for
         //       relocations with r_length != 2.
         referentOffset =
-            sec.addr + relInfo.r_address + 4 + rawAddend - referentSec.addr;
+            sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr;
       } else {
         // The addend for a non-pcrel relocation is its absolute address.
-        referentOffset = rawAddend - referentSec.addr;
+        referentOffset = totalAddend - referentSec.addr;
       }
       r.referent = findContainingSubsection(referentSubsecMap, &referentOffset);
       r.addend = referentOffset;
     }
 
     InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
+    if (p.type != GENERIC_RELOC_INVALID &&
+        target->hasAttr(p.type, RelocAttrBits::SUBTRAHEND))
+      subsec->relocs.push_back(p);
     subsec->relocs.push_back(r);
   }
 }

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 9287d49dae3c..a73c57d9377d 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -10,6 +10,7 @@
 #include "InputFiles.h"
 #include "OutputSegment.h"
 #include "Symbols.h"
+#include "SyntheticSections.h"
 #include "Target.h"
 #include "Writer.h"
 #include "lld/Common/Memory.h"
@@ -33,17 +34,46 @@ uint64_t InputSection::getFileSize() const {
 
 uint64_t InputSection::getVA() const { return parent->addr + outSecOff; }
 
+static uint64_t resolveSymbolVA(uint8_t *loc, const lld::macho::Symbol &sym,
+                                uint8_t type) {
+  const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(type);
+  if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
+    if (sym.isInStubs())
+      return in.stubs->addr + sym.stubsIndex * target->stubSize;
+  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) {
+    if (sym.isInGot())
+      return in.got->addr + sym.gotIndex * WordSize;
+  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
+    return in.got->addr + sym.gotIndex * WordSize;
+  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) {
+    if (sym.isInGot())
+      return in.tlvPointers->addr + sym.gotIndex * WordSize;
+    assert(isa<Defined>(&sym));
+  }
+  return sym.getVA();
+}
+
 void InputSection::writeTo(uint8_t *buf) {
   if (getFileSize() == 0)
     return;
 
   memcpy(buf, data.data(), data.size());
 
-  for (Reloc &r : relocs) {
+  for (size_t i = 0; i < relocs.size(); i++) {
+    const Reloc &r = relocs[i];
+    uint8_t *loc = buf + r.offset;
+    auto *fromSym = target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)
+                        ? relocs[i++].referent.dyn_cast<Symbol *>()
+                        : nullptr;
     uint64_t referentVA = 0;
-    if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
-      referentVA =
-          target->resolveSymbolVA(buf + r.offset, *referentSym, r.type);
+    if (fromSym) {
+      auto *toSym = r.referent.dyn_cast<Symbol *>();
+      referentVA = toSym->getVA() - fromSym->getVA();
+    } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
+      if (target->hasAttr(r.type, RelocAttrBits::LOAD) &&
+          !referentSym->isInGot())
+        target->relaxGotLoad(loc, r.type);
+      referentVA = resolveSymbolVA(loc, *referentSym, r.type);
 
       if (isThreadLocalVariables(flags)) {
         // References from thread-local variable sections are treated as offsets
@@ -56,11 +86,7 @@ void InputSection::writeTo(uint8_t *buf) {
     } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {
       referentVA = referentIsec->getVA();
     }
-
-    uint64_t referentVal = referentVA + r.addend;
-    if (r.pcrel)
-      referentVal -= getVA() + r.offset;
-    target->relocateOne(buf + r.offset, r, referentVal);
+    target->relocateOne(loc, r, referentVA, getVA() + r.offset);
   }
 }
 

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 00b523fb8d46..57c2c4fd885a 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -23,16 +23,16 @@ class OutputSection;
 class Symbol;
 
 struct Reloc {
-  uint8_t type;
-  bool pcrel;
-  uint8_t length;
+  uint8_t type = llvm::MachO::GENERIC_RELOC_INVALID;
+  bool pcrel = false;
+  uint8_t length = 0;
   // The offset from the start of the subsection that this relocation belongs
   // to.
-  uint32_t offset;
+  uint32_t offset = 0;
   // Adding this offset to the address of the referent symbol or subsection
   // gives the destination that this relocation refers to.
-  uint64_t addend;
-  llvm::PointerUnion<Symbol *, InputSection *> referent;
+  uint64_t addend = 0;
+  llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
 };
 
 class InputSection {

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 3f5413696d4b..a5696fc9d390 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -380,9 +380,7 @@ void macho::addNonLazyBindingEntries(const Symbol *sym,
     in.rebase->addEntry(section, offset);
     if (defined->isExternalWeakDef())
       in.weakBinding->addEntry(sym, section, offset, addend);
-  } else if (isa<DSOHandle>(sym)) {
-    error("cannot bind to " + DSOHandle::name);
-  } else {
+  } else if (!isa<DSOHandle>(sym)) {
     // Undefined symbols are filtered out in scanRelocations(); we should never
     // get here
     llvm_unreachable("cannot bind to an undefined symbol");

diff  --git a/lld/MachO/Target.cpp b/lld/MachO/Target.cpp
index 0f70776a507f..293eb7d6f164 100644
--- a/lld/MachO/Target.cpp
+++ b/lld/MachO/Target.cpp
@@ -7,8 +7,42 @@
 //===----------------------------------------------------------------------===//
 
 #include "Target.h"
+#include "InputSection.h"
+#include "Symbols.h"
+#include "SyntheticSections.h"
 
+#include "lld/Common/ErrorHandler.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
 using namespace lld;
 using namespace lld::macho;
 
+const TargetInfo::RelocAttrs TargetInfo::invalidRelocAttrs{"INVALID",
+                                                           RelocAttrBits::_0};
+
+bool TargetInfo::validateSymbolRelocation(const Symbol *sym,
+                                          const InputSection *isec,
+                                          const Reloc &r) {
+  const RelocAttrs &relocAttrs = getRelocAttrs(r.type);
+  bool valid = true;
+  auto message = [relocAttrs, sym, isec, &valid](const Twine &diagnostic) {
+    valid = false;
+    return (relocAttrs.name + " relocation " + diagnostic + " for `" +
+            sym->getName() + "' in " + toString(isec))
+        .str();
+  };
+
+  if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD) !=
+      sym->isTlv())
+    error(message(Twine("requires that variable ") +
+                  (sym->isTlv() ? "not " : "") + "be thread-local"));
+  if (relocAttrs.hasAttr(RelocAttrBits::DYSYM8) && isa<DylibSymbol>(sym) &&
+      r.length != 3)
+    error(message("has width " + std::to_string(1 << r.length) +
+                  " bytes, but must be 8 bytes"));
+
+  return valid;
+}
+
 TargetInfo *macho::target = nullptr;

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 8537803160e6..44035b85571d 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -9,6 +9,7 @@
 #ifndef LLD_MACHO_TARGET_H
 #define LLD_MACHO_TARGET_H
 
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
 
@@ -17,6 +18,7 @@
 
 namespace lld {
 namespace macho {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
 
 class Symbol;
 class DylibSymbol;
@@ -32,17 +34,41 @@ enum : uint64_t {
   MaxAlignmentPowerOf2 = 32,
 };
 
+enum class RelocAttrBits {
+  _0 = 0,              // invalid
+  PCREL = 1 << 0,      // Value is PC-relative offset
+  ABSOLUTE = 1 << 1,   // Value is an absolute address or fixed offset
+  BYTE4 = 1 << 2,      // 4 byte datum
+  BYTE8 = 1 << 3,      // 8 byte datum
+  EXTERN = 1 << 4,     // Can have an external symbol
+  LOCAL = 1 << 5,      // Can have a local symbol
+  ADDEND = 1 << 6,     // *_ADDEND paired prefix reloc
+  SUBTRAHEND = 1 << 7, // *_SUBTRACTOR paired prefix reloc
+  BRANCH = 1 << 8,     // Value is branch target
+  GOT = 1 << 9,        // Pertains to Global Offset Table slots
+  TLV = 1 << 10,       // Pertains to Thread-Local Variable slots
+  DYSYM8 = 1 << 11,    // Requires DySym width to be 8 bytes
+  LOAD = 1 << 12,      // Relaxable indirect load
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ LOAD),
+};
+
 class TargetInfo {
 public:
+  struct RelocAttrs {
+    llvm::StringRef name;
+    RelocAttrBits bits;
+    bool hasAttr(RelocAttrBits b) const { return (bits & b) == b; }
+  };
+  static const RelocAttrs invalidRelocAttrs;
+
   virtual ~TargetInfo() = default;
 
   // Validate the relocation structure and get its addend.
-  virtual uint64_t getAddend(llvm::MemoryBufferRef,
-                             const llvm::MachO::section_64 &,
-                             llvm::MachO::relocation_info,
-                             llvm::MachO::relocation_info) const = 0;
-  virtual bool isPairedReloc(llvm::MachO::relocation_info) const = 0;
-  virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0;
+  virtual uint64_t
+  getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
+                    const llvm::MachO::relocation_info) const = 0;
+  virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
+                           uint64_t pc) const = 0;
 
   // Write code for lazy binding. See the comments on StubsSection for more
   // details.
@@ -56,10 +82,20 @@ class TargetInfo {
   // GOT/stubs entries, and resolveSymbolVA() will return the addresses of those
   // entries. resolveSymbolVA() may also relax the target instructions to save
   // on a level of address indirection.
-  virtual void prepareSymbolRelocation(Symbol *, const InputSection *,
-                                       const Reloc &) = 0;
-  virtual uint64_t resolveSymbolVA(uint8_t *buf, const Symbol &,
-                                   uint8_t type) const = 0;
+  virtual void relaxGotLoad(uint8_t *loc, uint8_t type) const = 0;
+
+  virtual const RelocAttrs &getRelocAttrs(uint8_t type) const = 0;
+
+  bool hasAttr(uint8_t type, RelocAttrBits bit) const {
+    return getRelocAttrs(type).hasAttr(bit);
+  }
+
+  bool validateRelocationInfo(llvm::MemoryBufferRef,
+                              const llvm::MachO::section_64 &sec,
+                              llvm::MachO::relocation_info);
+  bool validateSymbolRelocation(const Symbol *, const InputSection *isec,
+                                const Reloc &);
+  void prepareSymbolRelocation(Symbol *, const InputSection *, const Reloc &);
 
   uint32_t cpuType;
   uint32_t cpuSubtype;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index bdc3609e033d..81df8dece173 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -402,6 +402,29 @@ class LCUuid : public LoadCommand {
 
 } // namespace
 
+static void prepareSymbolRelocation(lld::macho::Symbol *sym,
+                                    const InputSection *isec, const Reloc &r) {
+  const TargetInfo::RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
+
+  if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
+    prepareBranchTarget(sym);
+  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT | RelocAttrBits::LOAD)) {
+    if (needsBinding(sym))
+      in.got->addEntry(sym);
+  } else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
+    in.got->addEntry(sym);
+  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV | RelocAttrBits::LOAD)) {
+    if (needsBinding(sym))
+      in.tlvPointers->addEntry(sym);
+  } else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
+    // References from thread-local variable sections are treated as offsets
+    // relative to the start of the referent section, and therefore have no
+    // need of rebase opcodes.
+    if (!(isThreadLocalVariables(isec->flags) && isa<Defined>(sym)))
+      addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
+  }
+}
+
 void Writer::scanRelocations() {
   for (InputSection *isec : inputSections) {
     // We do not wish to add rebase opcodes for __LD,__compact_unwind, because
@@ -409,13 +432,17 @@ void Writer::scanRelocations() {
     // before Writer runs might be cleaner...
     if (isec->segname == segment_names::ld)
       continue;
+    if (isec->name == section_names::ehFrame)
+      continue;
 
     for (Reloc &r : isec->relocs) {
-      if (auto *s = r.referent.dyn_cast<lld::macho::Symbol *>()) {
-        if (isa<Undefined>(s))
-          treatUndefinedSymbol(toString(*s), toString(isec->file));
-        else
-          target->prepareSymbolRelocation(s, isec, r);
+      if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND))
+        continue;
+      if (auto *sym = r.referent.dyn_cast<lld::macho::Symbol *>()) {
+        if (isa<Undefined>(sym))
+          treatUndefinedSymbol(toString(*sym), toString(isec->file));
+        else if (target->validateSymbolRelocation(sym, isec, r))
+          prepareSymbolRelocation(sym, isec, r);
       } else {
         assert(r.referent.is<InputSection *>());
         if (!r.pcrel)
@@ -614,7 +641,7 @@ static void sortSegmentsAndSections() {
   uint32_t sectionIndex = 0;
   for (OutputSegment *seg : outputSegments) {
     seg->sortOutputSections(compareByOrder<OutputSection *>(sectionOrder));
-    for (auto *osec : seg->getSections()) {
+    for (OutputSection *osec : seg->getSections()) {
       // Now that the output sections are sorted, assign the final
       // output section indices.
       if (!osec->isHidden())
@@ -693,7 +720,7 @@ void Writer::assignAddresses(OutputSegment *seg) {
   fileOff = alignTo(fileOff, PageSize);
   seg->fileOff = fileOff;
 
-  for (auto *osec : seg->getSections()) {
+  for (OutputSection *osec : seg->getSections()) {
     if (!osec->isNeeded())
       continue;
     addr = alignTo(addr, osec->align);

diff  --git a/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s b/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s
index 4a0be8549613..6824282045fc 100644
--- a/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s
+++ b/lld/test/MachO/invalid/bad-got-to-dylib-tlv-reference.s
@@ -8,7 +8,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
 # RUN: not %lld -lSystem -L%t -ltlv -o /dev/null %t/test.o 2>&1 | FileCheck %s -DFILE=%t/test.o
 
-# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text)
+# CHECK: error: GOT_LOAD relocation requires that variable not be thread-local for `_foo' in [[FILE]]:(__text)
 
 #--- libtlv.s
 .section	__DATA,__thread_vars,thread_local_variables

diff  --git a/lld/test/MachO/invalid/bad-got-to-tlv-reference.s b/lld/test/MachO/invalid/bad-got-to-tlv-reference.s
index b9923799d66f..815a3de0df5b 100644
--- a/lld/test/MachO/invalid/bad-got-to-tlv-reference.s
+++ b/lld/test/MachO/invalid/bad-got-to-tlv-reference.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
 # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o
 
-# CHECK: error: found GOT relocation referencing thread-local variable in [[FILE]]:(__text)
+# CHECK: error: GOT_LOAD relocation requires that variable not be thread-local for `_foo' in [[FILE]]:(__text)
 
 .text
 .globl _main

diff  --git a/lld/test/MachO/invalid/bad-tlv-def.s b/lld/test/MachO/invalid/bad-tlv-def.s
index a1ef0110c240..4806e429579f 100644
--- a/lld/test/MachO/invalid/bad-tlv-def.s
+++ b/lld/test/MachO/invalid/bad-tlv-def.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
 # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s
 
-# CHECK: error: relocations in thread-local variable sections must be X86_64_RELOC_UNSIGNED
+# CHECK: error: GOT_LOAD relocation not allowed in thread-local section, must be UNSIGNED
 
 .text
 .globl _main

diff  --git a/lld/test/MachO/invalid/bad-tlv-opcode.s b/lld/test/MachO/invalid/bad-tlv-opcode.s
index c881a9792f2a..28d03048bf47 100644
--- a/lld/test/MachO/invalid/bad-tlv-opcode.s
+++ b/lld/test/MachO/invalid/bad-tlv-opcode.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
 # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s
 
-# CHECK: error: X86_64_RELOC_TLV must be used with movq instructions
+# CHECK: error: TLV reloc requires MOVQ instruction
 
 .text
 .globl _main

diff  --git a/lld/test/MachO/invalid/bad-tlv-relocation.s b/lld/test/MachO/invalid/bad-tlv-relocation.s
index f6c857b5497b..6c3489d0d0ce 100644
--- a/lld/test/MachO/invalid/bad-tlv-relocation.s
+++ b/lld/test/MachO/invalid/bad-tlv-relocation.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
 # RUN: not %lld -o /dev/null %t.o 2>&1 | FileCheck %s -DFILE=%t.o
 
-# CHECK: error: found X86_64_RELOC_TLV referencing a non-thread-local variable in [[FILE]]:(__text)
+# CHECK: TLV relocation requires that variable be thread-local for `_foo' in [[FILE]]:(__text)
 
 .text
 .globl _main

diff  --git a/lld/test/MachO/invalid/invalid-relocation-length.yaml b/lld/test/MachO/invalid/invalid-relocation-length.yaml
index e8cfbec182ed..ff8759b41747 100644
--- a/lld/test/MachO/invalid/invalid-relocation-length.yaml
+++ b/lld/test/MachO/invalid/invalid-relocation-length.yaml
@@ -2,7 +2,7 @@
 # RUN: yaml2obj %s -o %t.o
 # RUN: not %lld -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o
 #
-# CHECK: error: invalid relocation at offset 1 of __TEXT,__text in [[FILE]]: relocations of type 0 must have r_length of 2 or 3
+# CHECK: error: UNSIGNED relocation has width 2 bytes, but must be 4 or 8 bytes at offset 1 of __TEXT,__text in [[FILE]]
 
 !mach-o
 FileHeader:

diff  --git a/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml b/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml
index 3fa2f1f7db8b..cd95b1a07d07 100644
--- a/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml
+++ b/lld/test/MachO/invalid/invalid-relocation-pcrel.yaml
@@ -2,7 +2,7 @@
 # RUN: yaml2obj %s -o %t.o
 # RUN: not %lld -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o
 #
-# CHECK: error: invalid relocation at offset 1 of __TEXT,__text in [[FILE]]: relocations of type 0 must not be pcrel
+# CHECK: error: UNSIGNED relocation must not be PC-relative at offset 1 of __TEXT,__text in [[FILE]]
 
 !mach-o
 FileHeader:

diff  --git a/lld/test/MachO/x86-64-reloc-subtract.s b/lld/test/MachO/x86-64-reloc-subtract.s
new file mode 100644
index 000000000000..6b16d0ee41a5
--- /dev/null
+++ b/lld/test/MachO/x86-64-reloc-subtract.s
@@ -0,0 +1,33 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: %lld -o %t %t.o
+# RUN: llvm-objdump --syms --full-contents %t | FileCheck %s
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK: {{0*}}[[#%x, SUB1ADDR:]] g {{.*}} __DATA,subby _sub1
+# CHECK: {{0*}}[[#%x, SUB2ADDR:]] g {{.*}} __DATA,subby _sub2
+# CHECK-LABEL: Contents of section __DATA,subby:
+# CHECK: [[#SUB1ADDR]] 10000000
+# CHECK: [[#SUB2ADDR]] f0ffffff
+
+.globl _main, _sub1, _sub2
+
+.section __DATA,subby
+L_.subtrahend_1:
+  .space 16
+L_.minuend_1:
+  .space 16
+L_.minuend_2:
+  .space 16
+L_.subtrahend_2:
+  .space 16
+_sub1:
+  .long L_.minuend_1 - L_.subtrahend_1
+  .space 12
+_sub2:
+  .long L_.minuend_2 - L_.subtrahend_2
+
+.text
+_main:
+  mov $0, %rax
+  ret

diff  --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index f5d5ec328b5e..1ec65363f89b 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -399,6 +399,7 @@ enum RelocationInfoType {
   // Constant values for the r_type field in an
   // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
   // structure.
+  GENERIC_RELOC_INVALID = 0xff,
   GENERIC_RELOC_VANILLA = 0,
   GENERIC_RELOC_PAIR = 1,
   GENERIC_RELOC_SECTDIFF = 2,


        


More information about the llvm-commits mailing list