[lld] dc8bee9 - [lld-macho] Check address ranges when applying relocations

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 12 14:26:41 PST 2021


Author: Jez Ng
Date: 2021-03-12T17:26:27-05:00
New Revision: dc8bee92658e4ac2314aa2f59123078d49463219

URL: https://github.com/llvm/llvm-project/commit/dc8bee92658e4ac2314aa2f59123078d49463219
DIFF: https://github.com/llvm/llvm-project/commit/dc8bee92658e4ac2314aa2f59123078d49463219.diff

LOG: [lld-macho] Check address ranges when applying relocations

This diff required fixing `getEmbeddedAddend` to apply sign
extension to 32-bit values. We were previously passing around wrong
64-bit addend values that became "right" after being truncated back to
32-bit.

I've also made `getEmbeddedAddend` return a signed int, which is similar
to what LLD-ELF does for its `getImplicitAddend`.

`reportRangeError`, `checkUInt`, and `checkInt` are counterparts of similar
functions in LLD-ELF.

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D98387

Added: 
    lld/test/MachO/invalid/range-check.s

Modified: 
    lld/MachO/Arch/ARM64.cpp
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/Relocations.cpp
    lld/MachO/Relocations.h
    lld/MachO/Target.h

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp
index 0d1e2f447adc..14fcee290d12 100644
--- a/lld/MachO/Arch/ARM64.cpp
+++ b/lld/MachO/Arch/ARM64.cpp
@@ -28,8 +28,8 @@ namespace {
 struct ARM64 : TargetInfo {
   ARM64();
 
-  uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
-                             const relocation_info) const override;
+  int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
+                            const relocation_info) const override;
   void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
                    uint64_t pc) const override;
 
@@ -77,8 +77,8 @@ const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
   return relocAttrsArray[type];
 }
 
-uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
-                                  const relocation_info rel) const {
+int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
+                                 const relocation_info rel) const {
   if (rel.r_type != ARM64_RELOC_UNSIGNED &&
       rel.r_type != ARM64_RELOC_SUBTRACTOR) {
     // All other reloc types should use the ADDEND relocation to store their
@@ -91,7 +91,7 @@ uint64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
   const uint8_t *loc = buf + sec.offset + rel.r_address;
   switch (rel.r_length) {
   case 2:
-    return read32le(loc);
+    return static_cast<int32_t>(read32le(loc));
   case 3:
     return read64le(loc);
   default:
@@ -108,18 +108,30 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
 // |           |                       imm26                       |
 // +-----------+---------------------------------------------------+
 
-inline uint64_t encodeBranch26(uint64_t base, uint64_t va) {
+inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
+  checkInt(r, va, 28);
   // Since branch destinations are 4-byte aligned, the 2 least-
   // significant bits are 0. They are right shifted off the end.
   return (base | bitField(va, 2, 26, 0));
 }
 
+inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
+  checkInt(d, va, 28);
+  return (base | bitField(va, 2, 26, 0));
+}
+
 //   30 29          23                                  5
 // +-+---+---------+-------------------------------------+---------+
 // | |ilo|         |                immhi                |         |
 // +-+---+---------+-------------------------------------+---------+
 
-inline uint64_t encodePage21(uint64_t base, uint64_t va) {
+inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
+  checkInt(r, va, 35);
+  return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+}
+
+inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
+  checkInt(d, va, 35);
   return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
 }
 
@@ -158,21 +170,25 @@ void ARM64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
   value += r.addend;
   switch (r.type) {
   case ARM64_RELOC_BRANCH26:
-    value = encodeBranch26(base, value - pc);
+    value = encodeBranch26(r, base, value - pc);
     break;
   case ARM64_RELOC_SUBTRACTOR:
   case ARM64_RELOC_UNSIGNED:
+    if (r.length == 2)
+      checkInt(r, value, 32);
     break;
   case ARM64_RELOC_POINTER_TO_GOT:
     if (r.pcrel)
       value -= pc;
+    checkInt(r, value, 32);
     break;
   case ARM64_RELOC_PAGE21:
   case ARM64_RELOC_GOT_LOAD_PAGE21:
-  case ARM64_RELOC_TLVP_LOAD_PAGE21:
+  case ARM64_RELOC_TLVP_LOAD_PAGE21: {
     assert(r.pcrel);
-    value = encodePage21(base, pageBits(value) - pageBits(pc));
+    value = encodePage21(r, base, pageBits(value) - pageBits(pc));
     break;
+  }
   case ARM64_RELOC_PAGEOFF12:
   case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
   case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
@@ -206,7 +222,8 @@ void ARM64::writeStub(uint8_t *buf8, const macho::Symbol &sym) const {
   uint64_t pcPageBits =
       pageBits(in.stubs->addr + sym.stubsIndex * sizeof(stubCode));
   uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * WordSize;
-  buf32[0] = encodePage21(stubCode[0], pageBits(lazyPointerVA) - pcPageBits);
+  buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
+                          pageBits(lazyPointerVA) - pcPageBits);
   buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
   buf32[2] = stubCode[2];
 }
@@ -226,14 +243,15 @@ void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
     return pageBits(in.stubHelper->addr + i * sizeof(uint32_t));
   };
   uint64_t loaderVA = in.imageLoaderCache->getVA();
-  buf32[0] =
-      encodePage21(stubHelperHeaderCode[0], pageBits(loaderVA) - pcPageBits(0));
+  SymbolDiagnostic d = {nullptr, "stub header helper"};
+  buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
+                          pageBits(loaderVA) - pcPageBits(0));
   buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
   buf32[2] = stubHelperHeaderCode[2];
   uint64_t binderVA =
       in.got->addr + in.stubHelper->stubBinder->gotIndex * WordSize;
-  buf32[3] =
-      encodePage21(stubHelperHeaderCode[3], pageBits(binderVA) - pcPageBits(3));
+  buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
+                          pageBits(binderVA) - pcPageBits(3));
   buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
   buf32[5] = stubHelperHeaderCode[5];
 }
@@ -250,8 +268,8 @@ void ARM64::writeStubHelperEntry(uint8_t *buf8, const DylibSymbol &sym,
   auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
   uint64_t stubHelperHeaderVA = in.stubHelper->addr;
   buf32[0] = stubHelperEntryCode[0];
-  buf32[1] =
-      encodeBranch26(stubHelperEntryCode[1], stubHelperHeaderVA - pcVA(1));
+  buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
+                            stubHelperHeaderVA - pcVA(1));
   buf32[2] = sym.lazyBindOffset;
 }
 

diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 7a7ad0a7f0fc..d4bb841a571f 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -25,8 +25,8 @@ namespace {
 struct X86_64 : TargetInfo {
   X86_64();
 
-  uint64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
-                             const relocation_info) const override;
+  int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
+                            const relocation_info) const override;
   void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
                    uint64_t relocVA) const override;
 
@@ -77,14 +77,14 @@ static int pcrelOffset(uint8_t type) {
   }
 }
 
-uint64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
-                                   relocation_info rel) const {
+int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
+                                  relocation_info rel) const {
   auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
   const uint8_t *loc = buf + sec.offset + rel.r_address;
 
   switch (rel.r_length) {
   case 2:
-    return read32le(loc) + pcrelOffset(rel.r_type);
+    return static_cast<int32_t>(read32le(loc)) + pcrelOffset(rel.r_type);
   case 3:
     return read64le(loc) + pcrelOffset(rel.r_type);
   default:
@@ -102,6 +102,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
 
   switch (r.length) {
   case 2:
+    if (r.type == X86_64_RELOC_UNSIGNED)
+      checkUInt(r, value, 32);
+    else
+      checkInt(r, value, 32);
     write32le(loc, value);
     break;
   case 3:
@@ -121,9 +125,10 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
 // bufAddr:  The virtual address corresponding to buf[0].
 // bufOff:   The offset within buf of the next instruction.
 // destAddr: The destination address that the current instruction references.
-static void writeRipRelative(uint8_t *buf, uint64_t bufAddr, uint64_t bufOff,
-                             uint64_t destAddr) {
+static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
+                             uint64_t bufOff, uint64_t destAddr) {
   uint64_t rip = bufAddr + bufOff;
+  checkInt(d, destAddr - rip, 32);
   // For the instructions we care about, the RIP-relative address is always
   // stored in the last 4 bytes of the instruction.
   write32le(buf + bufOff - 4, destAddr - rip);
@@ -136,7 +141,7 @@ static constexpr uint8_t stub[] = {
 void X86_64::writeStub(uint8_t *buf, const macho::Symbol &sym) const {
   memcpy(buf, stub, 2); // just copy the two nonzero bytes
   uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
-  writeRipRelative(buf, stubAddr, sizeof(stub),
+  writeRipRelative({&sym, "stub"}, buf, stubAddr, sizeof(stub),
                    in.lazyPointers->addr + sym.stubsIndex * WordSize);
 }
 
@@ -149,8 +154,10 @@ static constexpr uint8_t stubHelperHeader[] = {
 
 void X86_64::writeStubHelperHeader(uint8_t *buf) const {
   memcpy(buf, stubHelperHeader, sizeof(stubHelperHeader));
-  writeRipRelative(buf, in.stubHelper->addr, 7, in.imageLoaderCache->getVA());
-  writeRipRelative(buf, in.stubHelper->addr, 0xf,
+  SymbolDiagnostic d = {nullptr, "stub helper header"};
+  writeRipRelative(d, buf, in.stubHelper->addr, 7,
+                   in.imageLoaderCache->getVA());
+  writeRipRelative(d, buf, in.stubHelper->addr, 0xf,
                    in.got->addr +
                        in.stubHelper->stubBinder->gotIndex * WordSize);
 }
@@ -164,8 +171,8 @@ void X86_64::writeStubHelperEntry(uint8_t *buf, const DylibSymbol &sym,
                                   uint64_t entryAddr) const {
   memcpy(buf, stubHelperEntry, sizeof(stubHelperEntry));
   write32le(buf + 1, sym.lazyBindOffset);
-  writeRipRelative(buf, entryAddr, sizeof(stubHelperEntry),
-                   in.stubHelper->addr);
+  writeRipRelative({&sym, "stub helper"}, buf, entryAddr,
+                   sizeof(stubHelperEntry), in.stubHelper->addr);
 }
 
 void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 32ef5b46730a..4ef56eaccde4 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -264,7 +264,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
     // and insert them. Storing addends in the instruction stream is
     // possible, but inconvenient and more costly at link time.
 
-    uint64_t pairedAddend = 0;
+    int64_t pairedAddend = 0;
     relocation_info relInfo = relInfos[i];
     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
@@ -276,9 +276,9 @@ void ObjFile::parseRelocations(const section_64 &sec,
     if (relInfo.r_address & R_SCATTERED)
       fatal("TODO: Scattered relocations not supported");
 
-    uint64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
+    int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
     assert(!(embeddedAddend && pairedAddend));
-    uint64_t totalAddend = pairedAddend + embeddedAddend;
+    int64_t totalAddend = pairedAddend + embeddedAddend;
     Reloc r;
     r.type = relInfo.r_type;
     r.pcrel = relInfo.r_pcrel;

diff  --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index a60dbb21ff9e..e3b4cea5123b 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -39,4 +39,24 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
   return valid;
 }
 
+void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
+                             int64_t min, uint64_t max) {
+  std::string hint;
+  if (auto *sym = r.referent.dyn_cast<Symbol *>())
+    hint = "; references " + toString(*sym);
+  // TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
+  error("relocation " + target->getRelocAttrs(r.type).name +
+        " is out of range: " + v + " is not in [" + Twine(min) + ", " +
+        Twine(max) + "]" + hint);
+}
+
+void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
+                             int64_t min, uint64_t max) {
+  std::string hint;
+  if (d.symbol)
+    hint = "; references " + toString(*d.symbol);
+  error(d.reason + " is out of range: " + v + " is not in [" + Twine(min) +
+        ", " + Twine(max) + "]" + hint);
+}
+
 const RelocAttrs macho::invalidRelocAttrs{"INVALID", RelocAttrBits::_0};

diff  --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 21a27aba14ac..f717c462d722 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -59,13 +59,41 @@ struct Reloc {
   uint32_t offset = 0;
   // Adding this offset to the address of the referent symbol or subsection
   // gives the destination that this relocation refers to.
-  uint64_t addend = 0;
+  int64_t addend = 0;
   llvm::PointerUnion<Symbol *, InputSection *> referent = nullptr;
 };
 
 bool validateSymbolRelocation(const Symbol *, const InputSection *,
                               const Reloc &);
 
+/*
+ * v: The value the relocation is attempting to encode
+ * bits: The number of bits actually available to encode this relocation
+ */
+void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
+                      int64_t min, uint64_t max);
+
+struct SymbolDiagnostic {
+  const Symbol *symbol;
+  llvm::StringRef reason;
+};
+
+void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
+                      int64_t min, uint64_t max);
+
+template <typename Diagnostic>
+inline void checkInt(Diagnostic d, int64_t v, int bits) {
+  if (v != llvm::SignExtend64(v, bits))
+    reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
+                     llvm::maxIntN(bits));
+}
+
+template <typename Diagnostic>
+inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
+  if ((v >> bits) != 0)
+    reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
+}
+
 extern const RelocAttrs invalidRelocAttrs;
 
 } // namespace macho

diff  --git a/lld/MachO/Target.h b/lld/MachO/Target.h
index 8d98a73a6cca..52a2c5b44376 100644
--- a/lld/MachO/Target.h
+++ b/lld/MachO/Target.h
@@ -39,7 +39,7 @@ class TargetInfo {
   virtual ~TargetInfo() = default;
 
   // Validate the relocation structure and get its addend.
-  virtual uint64_t
+  virtual int64_t
   getEmbeddedAddend(llvm::MemoryBufferRef, const llvm::MachO::section_64 &,
                     const llvm::MachO::relocation_info) const = 0;
   virtual void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,

diff  --git a/lld/test/MachO/invalid/range-check.s b/lld/test/MachO/invalid/range-check.s
new file mode 100644
index 000000000000..22778f427846
--- /dev/null
+++ b/lld/test/MachO/invalid/range-check.s
@@ -0,0 +1,32 @@
+# REQUIRES: x86
+
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
+# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
+# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s
+
+# CHECK: error: relocation UNSIGNED is out of range: 8589942792 is not in [0, 4294967295]; references _foo
+# CHECK: error: relocation GOT_LOAD is out of range: 4294974033 is not in [-2147483648, 2147483647]; references _foo
+# CHECK: error: stub is out of range: 4294974006 is not in [-2147483648, 2147483647]; references _bar
+# CHECK: error: stub helper header is out of range: 4294974005 is not in [-2147483648, 2147483647]
+# CHECK: error: stub helper header is out of range: 4294969893 is not in [-2147483648, 2147483647]
+
+#--- bar.s
+.globl _bar
+_bar:
+
+#--- test.s
+.globl _main, _foo
+
+_main:
+  movq _foo at GOTPCREL(%rip), %rax
+  callq _bar
+  ret
+
+.int _foo
+.zerofill __TEXT,bss,_zero,0xffffffff
+
+.data
+_foo:
+  .space 0


        


More information about the llvm-commits mailing list