[lld] d4ec334 - [lld-macho][nfc] Refactor to accommodate paired relocs

Greg McGary via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 17 20:22:26 PST 2020


Author: Greg McGary
Date: 2020-12-17T20:21:41-08:00
New Revision: d4ec3346b1baf31819d20a8950ced8be8f66a408

URL: https://github.com/llvm/llvm-project/commit/d4ec3346b1baf31819d20a8950ced8be8f66a408
DIFF: https://github.com/llvm/llvm-project/commit/d4ec3346b1baf31819d20a8950ced8be8f66a408.diff

LOG: [lld-macho][nfc] Refactor to accommodate paired relocs

This is a refactor to pave the way for supporting paired-ADDEND for ARM64. The only paired reloc type for X86_64 is SUBTRACTOR. In a later diff, I will add SUBTRACTOR for both X86_64 and ARM64.

* s/`getImplicitAddend`/`getAddend`/ because it handles all forms of addend: implicit, explicit, paired.
* add predicate `bool isPairedReloc()`
* check range of `relInfo.r_symbolnum` is internal, unrelated to user-input, so use `assert()`, not `error()`
* minor cleanups & rearrangements in `InputFile::parseRelocations()`

Differential Revision: https://reviews.llvm.org/D90614

Added: 
    

Modified: 
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/Target.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index c776e21d6f5f..729ef603adb7 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -25,8 +25,9 @@ namespace {
 struct X86_64 : TargetInfo {
   X86_64();
 
-  uint64_t getImplicitAddend(MemoryBufferRef, const section_64 &,
-                             const relocation_info &) const override;
+  bool isPairedReloc(relocation_info) const override;
+  uint64_t getAddend(MemoryBufferRef, const section_64 &, relocation_info,
+                     relocation_info) const override;
   void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const override;
 
   void writeStub(uint8_t *buf, const macho::Symbol &) const override;
@@ -43,7 +44,7 @@ struct X86_64 : TargetInfo {
 } // namespace
 
 static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec,
-                                    const relocation_info &rel) {
+                                    relocation_info rel) {
   return ("invalid relocation at offset " + std::to_string(rel.r_address) +
           " of " + sec.segname + "," + sec.sectname + " in " +
           mb.getBufferIdentifier())
@@ -51,7 +52,7 @@ static std::string getErrorLocation(MemoryBufferRef mb, const section_64 &sec,
 }
 
 static void validateLength(MemoryBufferRef mb, const section_64 &sec,
-                           const relocation_info &rel,
+                           relocation_info rel,
                            ArrayRef<uint8_t> validLengths) {
   if (find(validLengths, rel.r_length) != validLengths.end())
     return;
@@ -68,8 +69,13 @@ static void validateLength(MemoryBufferRef mb, const section_64 &sec,
   fatal(msg);
 }
 
-uint64_t X86_64::getImplicitAddend(MemoryBufferRef mb, const section_64 &sec,
-                                   const relocation_info &rel) const {
+bool X86_64::isPairedReloc(relocation_info rel) const {
+  return rel.r_type == X86_64_RELOC_SUBTRACTOR;
+}
+
+uint64_t X86_64::getAddend(MemoryBufferRef mb, const section_64 &sec,
+                           relocation_info rel,
+                           relocation_info pairedRel) const {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
 
@@ -139,7 +145,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t val) const {
     break;
   default:
     llvm_unreachable(
-        "getImplicitAddend should have flagged all unhandled relocation types");
+        "getAddend should have flagged all unhandled relocation types");
   }
 
   switch (r.length) {

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index ce66c9650446..3a4466dd123a 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -206,31 +206,53 @@ static InputSection *findContainingSubsection(SubsectionMap &map,
 void ObjFile::parseRelocations(const section_64 &sec,
                                SubsectionMap &subsecMap) {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
-  ArrayRef<any_relocation_info> anyRelInfos(
-      reinterpret_cast<const any_relocation_info *>(buf + sec.reloff),
-      sec.nreloc);
-
-  for (const any_relocation_info &anyRelInfo : anyRelInfos) {
-    if (anyRelInfo.r_word0 & R_SCATTERED)
+  ArrayRef<relocation_info> relInfos(
+      reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
+
+  for (size_t i = 0; i < relInfos.size(); i++) {
+    // Paired relocations serve as Mach-O's method for attaching a
+    // supplemental datum to a primary relocation record. ELF does not
+    // need them because the *_RELOC_RELA records contain the extra
+    // addend field, vs. *_RELOC_REL which omit the addend.
+    //
+    // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend,
+    // and the paired *_RELOC_UNSIGNED record holds the minuend. The
+    // datum for each is a symbolic address. The result is the runtime
+    // offset between two addresses.
+    //
+    // The ARM64_RELOC_ADDEND record holds the addend, and the paired
+    // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the
+    // base symbolic address.
+    //
+    // Note: X86 does not use *_RELOC_ADDEND because it can embed an
+    // addend into the instruction stream. On X86, a relocatable address
+    // field always occupies an entire contiguous sequence of byte(s),
+    // so there is no need to merge opcode bits with address
+    // bits. Therefore, it's easy and convenient to store addends in the
+    // instruction-stream bytes that would otherwise contain zeroes. By
+    // contrast, RISC ISAs such as ARM64 mix opcode bits with with
+    // address bits so that bitwise arithmetic is necessary to extract
+    // and insert them. Storing addends in the instruction stream is
+    // possible, but inconvenient and more costly at link time.
+
+    relocation_info pairedInfo = relInfos[i];
+    relocation_info relInfo =
+        target->isPairedReloc(pairedInfo) ? relInfos[++i] : pairedInfo;
+    assert(i < relInfos.size());
+    if (relInfo.r_address & R_SCATTERED)
       fatal("TODO: Scattered relocations not supported");
 
-    auto relInfo = reinterpret_cast<const relocation_info &>(anyRelInfo);
-
     Reloc r;
     r.type = relInfo.r_type;
     r.pcrel = relInfo.r_pcrel;
     r.length = relInfo.r_length;
-    uint64_t rawAddend = target->getImplicitAddend(mb, sec, relInfo);
-
+    r.offset = relInfo.r_address;
+    // For unpaired relocs, pairdInfo (just a copy of relInfo) is ignored
+    uint64_t rawAddend = target->getAddend(mb, sec, relInfo, pairedInfo);
     if (relInfo.r_extern) {
       r.referent = symbols[relInfo.r_symbolnum];
       r.addend = rawAddend;
     } else {
-      if (relInfo.r_symbolnum == 0 || relInfo.r_symbolnum > subsections.size())
-        fatal("invalid section index in relocation for offset " +
-              std::to_string(r.offset) + " in section " + sec.sectname +
-              " of " + getName());
-
       SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
       const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
       uint32_t referentOffset;
@@ -250,7 +272,6 @@ void ObjFile::parseRelocations(const section_64 &sec,
       r.addend = referentOffset;
     }
 
-    r.offset = relInfo.r_address;
     InputSection *subsec = findContainingSubsection(subsecMap, &r.offset);
     subsec->relocs.push_back(r);
   }

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index d80da011e286..8537803160e6 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -37,9 +37,11 @@ class TargetInfo {
   virtual ~TargetInfo() = default;
 
   // Validate the relocation structure and get its addend.
-  virtual uint64_t
-  getImplicitAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
-                    const llvm::MachO::relocation_info &) const = 0;
+  virtual uint64_t getAddend(llvm::MemoryBufferRef,
+                             const llvm::MachO::section_64 &,
+                             llvm::MachO::relocation_info,
+                             llvm::MachO::relocation_info) const = 0;
+  virtual bool isPairedReloc(llvm::MachO::relocation_info) const = 0;
   virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t val) const = 0;
 
   // Write code for lazy binding. See the comments on StubsSection for more


        


More information about the llvm-commits mailing list