[lld] a552fb2 - [lld-macho] Have relocation address included in range-check error message

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 28 18:57:06 PST 2022


Author: Jez Ng
Date: 2022-02-28T21:56:38-05:00
New Revision: a552fb2a86dba5e33bd94fbcddfd5597eb49e619

URL: https://github.com/llvm/llvm-project/commit/a552fb2a86dba5e33bd94fbcddfd5597eb49e619
DIFF: https://github.com/llvm/llvm-project/commit/a552fb2a86dba5e33bd94fbcddfd5597eb49e619.diff

LOG: [lld-macho] Have relocation address included in range-check error message

This makes it easier to debug those errors. See e.g. https://github.com/llvm/llvm-project/issues/52767#issuecomment-1028713943

We take the approach of 'reverse-engineering' the InputSection from the
output buffer offset. This provides for a cleaner Target API, and is
similar to LLD-ELF's implementation of getErrorPlace().

Reviewed By: #lld-macho, Roger

Differential Revision: https://reviews.llvm.org/D118903

Added: 
    

Modified: 
    lld/MachO/Arch/ARM64Common.cpp
    lld/MachO/Arch/ARM64Common.h
    lld/MachO/Arch/X86_64.cpp
    lld/MachO/Relocations.cpp
    lld/MachO/Relocations.h
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    lld/test/MachO/invalid/range-check.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Arch/ARM64Common.cpp b/lld/MachO/Arch/ARM64Common.cpp
index 67e7292fd6fdb..f55258ce8ec9f 100644
--- a/lld/MachO/Arch/ARM64Common.cpp
+++ b/lld/MachO/Arch/ARM64Common.cpp
@@ -38,56 +38,57 @@ int64_t ARM64Common::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
   }
 }
 
+static void writeValue(uint8_t *loc, const Reloc &r, uint64_t value) {
+  switch (r.length) {
+  case 2:
+    checkInt(loc, r, value, 32);
+    write32le(loc, value);
+    break;
+  case 3:
+    write64le(loc, value);
+    break;
+  default:
+    llvm_unreachable("invalid r_length");
+  }
+}
+
 // For instruction relocations (load, store, add), the base
 // instruction is pre-populated in the text section. A pre-populated
 // instruction has opcode & register-operand bits set, with immediate
 // operands zeroed. We read it from text, OR-in the immediate
 // operands, then write-back the completed instruction.
-
 void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
                               uint64_t pc) const {
+  auto loc32 = reinterpret_cast<uint32_t *>(loc);
   uint32_t base = ((r.length == 2) ? read32le(loc) : 0);
   switch (r.type) {
   case ARM64_RELOC_BRANCH26:
-    value = encodeBranch26(r, base, value - pc);
+    encodeBranch26(loc32, r, base, value - pc);
     break;
   case ARM64_RELOC_SUBTRACTOR:
   case ARM64_RELOC_UNSIGNED:
-    if (r.length == 2)
-      checkInt(r, value, 32);
+    writeValue(loc, r, value);
     break;
   case ARM64_RELOC_POINTER_TO_GOT:
     if (r.pcrel)
       value -= pc;
-    checkInt(r, value, 32);
+    writeValue(loc, r, value);
     break;
   case ARM64_RELOC_PAGE21:
   case ARM64_RELOC_GOT_LOAD_PAGE21:
-  case ARM64_RELOC_TLVP_LOAD_PAGE21: {
+  case ARM64_RELOC_TLVP_LOAD_PAGE21:
     assert(r.pcrel);
-    value = encodePage21(r, base, pageBits(value) - pageBits(pc));
+    encodePage21(loc32, r, base, pageBits(value) - pageBits(pc));
     break;
-  }
   case ARM64_RELOC_PAGEOFF12:
   case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
   case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
     assert(!r.pcrel);
-    value = encodePageOff12(base, value);
+    encodePageOff12(loc32, base, value);
     break;
   default:
     llvm_unreachable("unexpected relocation type");
   }
-
-  switch (r.length) {
-  case 2:
-    write32le(loc, value);
-    break;
-  case 3:
-    write64le(loc, value);
-    break;
-  default:
-    llvm_unreachable("invalid r_length");
-  }
 }
 
 void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const {

diff  --git a/lld/MachO/Arch/ARM64Common.h b/lld/MachO/Arch/ARM64Common.h
index 934101caefb99..5f7b017b2ef2a 100644
--- a/lld/MachO/Arch/ARM64Common.h
+++ b/lld/MachO/Arch/ARM64Common.h
@@ -40,16 +40,18 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
 // |           |                       imm26                       |
 // +-----------+---------------------------------------------------+
 
-inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) {
-  checkInt(r, va, 28);
+inline void encodeBranch26(uint32_t *loc, const Reloc &r, uint32_t base,
+                           uint64_t va) {
+  checkInt(loc, r, va, 28);
   // Since branch destinations are 4-byte aligned, the 2 least-
   // significant bits are 0. They are right shifted off the end.
-  return (base | bitField(va, 2, 26, 0));
+  llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
 }
 
-inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
-  checkInt(d, va, 28);
-  return (base | bitField(va, 2, 26, 0));
+inline void encodeBranch26(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
+                           uint64_t va) {
+  checkInt(loc, d, va, 28);
+  llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
 }
 
 //   30 29          23                                  5
@@ -57,14 +59,18 @@ inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
 // | |ilo|         |                immhi                |         |
 // +-+---+---------+-------------------------------------+---------+
 
-inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) {
-  checkInt(r, va, 35);
-  return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+inline void encodePage21(uint32_t *loc, const Reloc &r, uint32_t base,
+                         uint64_t va) {
+  checkInt(loc, r, va, 35);
+  llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
+                                            bitField(va, 14, 19, 5));
 }
 
-inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
-  checkInt(d, va, 35);
-  return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5));
+inline void encodePage21(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
+                         uint64_t va) {
+  checkInt(loc, d, va, 35);
+  llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
+                                            bitField(va, 14, 19, 5));
 }
 
 //                      21                   10
@@ -72,7 +78,7 @@ inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
 // |                   |         imm12         |                   |
 // +-------------------+-----------------------+-------------------+
 
-inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
+inline void encodePageOff12(uint32_t *loc, uint32_t base, uint64_t va) {
   int scale = 0;
   if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store
     scale = base >> 30;
@@ -82,7 +88,8 @@ inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
 
   // TODO(gkm): extract embedded addend and warn if != 0
   // uint64_t addend = ((base & 0x003FFC00) >> 10);
-  return (base | bitField(va, scale, 12 - scale, 10));
+  llvm::support::endian::write32le(loc,
+                                   base | bitField(va, scale, 12 - scale, 10));
 }
 
 inline uint64_t pageBits(uint64_t address) {
@@ -99,9 +106,9 @@ inline void writeStub(uint8_t *buf8, const uint32_t stubCode[3],
       pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize);
   uint64_t lazyPointerVA =
       in.lazyPointers->addr + sym.stubsIndex * LP::wordSize;
-  buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
-                          pageBits(lazyPointerVA) - pcPageBits);
-  buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
+  encodePage21(&buf32[0], {&sym, "stub"}, stubCode[0],
+               pageBits(lazyPointerVA) - pcPageBits);
+  encodePageOff12(&buf32[1], stubCode[1], lazyPointerVA);
   buf32[2] = stubCode[2];
 }
 
@@ -114,15 +121,15 @@ inline void writeStubHelperHeader(uint8_t *buf8,
   };
   uint64_t loaderVA = in.imageLoaderCache->getVA();
   SymbolDiagnostic d = {nullptr, "stub header helper"};
-  buf32[0] = encodePage21(d, stubHelperHeaderCode[0],
-                          pageBits(loaderVA) - pcPageBits(0));
-  buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
+  encodePage21(&buf32[0], d, stubHelperHeaderCode[0],
+               pageBits(loaderVA) - pcPageBits(0));
+  encodePageOff12(&buf32[1], stubHelperHeaderCode[1], loaderVA);
   buf32[2] = stubHelperHeaderCode[2];
   uint64_t binderVA =
       in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize;
-  buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
-                          pageBits(binderVA) - pcPageBits(3));
-  buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
+  encodePage21(&buf32[3], d, stubHelperHeaderCode[3],
+               pageBits(binderVA) - pcPageBits(3));
+  encodePageOff12(&buf32[4], stubHelperHeaderCode[4], binderVA);
   buf32[5] = stubHelperHeaderCode[5];
 }
 
@@ -133,8 +140,8 @@ inline void writeStubHelperEntry(uint8_t *buf8,
   auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
   uint64_t stubHelperHeaderVA = in.stubHelper->addr;
   buf32[0] = stubHelperEntryCode[0];
-  buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1],
-                            stubHelperHeaderVA - pcVA(1));
+  encodeBranch26(&buf32[1], {&sym, "stub helper"}, stubHelperEntryCode[1],
+                 stubHelperHeaderVA - pcVA(1));
   buf32[2] = sym.lazyBindOffset;
 }
 

diff  --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp
index 7e2155801fc2f..68360eedf4937 100644
--- a/lld/MachO/Arch/X86_64.cpp
+++ b/lld/MachO/Arch/X86_64.cpp
@@ -102,9 +102,9 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
   switch (r.length) {
   case 2:
     if (r.type == X86_64_RELOC_UNSIGNED)
-      checkUInt(r, value, 32);
+      checkUInt(loc, r, value, 32);
     else
-      checkInt(r, value, 32);
+      checkInt(loc, r, value, 32);
     write32le(loc, value);
     break;
   case 3:
@@ -127,7 +127,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
 static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
                              uint64_t bufOff, uint64_t destAddr) {
   uint64_t rip = bufAddr + bufOff;
-  checkInt(d, destAddr - rip, 32);
+  checkInt(buf, d, destAddr - rip, 32);
   // For the instructions we care about, the RIP-relative address is always
   // stored in the last 4 bytes of the instruction.
   write32le(buf + bufOff - 4, destAddr - rip);

diff  --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index a711157679c14..6f4b00c7acf91 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Relocations.h"
+#include "ConcatOutputSection.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -38,19 +39,65 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
   return valid;
 }
 
-void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits,
-                             int64_t min, uint64_t max) {
+// Given an offset in the output buffer, figure out which ConcatInputSection (if
+// any) maps to it. At the same time, update the offset such that it is relative
+// to the InputSection rather than to the output buffer.
+//
+// Obtaining the InputSection allows us to have better error diagnostics.
+// However, many of our relocation-handling methods do not take the InputSection
+// as a parameter. Since we are already passing the buffer offsets to our Target
+// methods, this function allows us to emit better errors without threading an
+// additional InputSection argument through the call stack.
+//
+// This is implemented as a slow linear search through OutputSegments,
+// OutputSections, and finally the InputSections themselves. However, this
+// function should be called only on error paths, so some overhead is fine.
+static InputSection *offsetToInputSection(uint64_t *off) {
+  for (OutputSegment *seg : outputSegments) {
+    if (*off < seg->fileOff || *off >= seg->fileOff + seg->fileSize)
+      continue;
+
+    const std::vector<OutputSection *> &sections = seg->getSections();
+    size_t osecIdx = 0;
+    for (; osecIdx < sections.size(); ++osecIdx)
+      if (*off < sections[osecIdx]->fileOff)
+        break;
+    assert(osecIdx > 0);
+    // We should be only calling this function on offsets that belong to
+    // ConcatOutputSections.
+    auto *osec = cast<ConcatOutputSection>(sections[osecIdx - 1]);
+    *off -= osec->fileOff;
+
+    size_t isecIdx = 0;
+    for (; isecIdx < osec->inputs.size(); ++isecIdx) {
+      const ConcatInputSection *isec = osec->inputs[isecIdx];
+      if (*off < isec->outSecOff)
+        break;
+    }
+    assert(isecIdx > 0);
+    ConcatInputSection *isec = osec->inputs[isecIdx - 1];
+    *off -= isec->outSecOff;
+    return isec;
+  }
+  return nullptr;
+}
+
+void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v,
+                             uint8_t bits, int64_t min, uint64_t max) {
   std::string hint;
+  uint64_t off = reinterpret_cast<const uint8_t *>(loc) - in.bufferStart;
+  const InputSection *isec = offsetToInputSection(&off);
+  std::string locStr = isec ? isec->getLocation(off) : "(invalid location)";
   if (auto *sym = r.referent.dyn_cast<Symbol *>())
     hint = "; references " + toString(*sym);
-  // TODO: get location of reloc using something like LLD-ELF's getErrorPlace()
-  error("relocation " + target->getRelocAttrs(r.type).name +
+  error(locStr + ": relocation " + target->getRelocAttrs(r.type).name +
         " is out of range: " + v + " is not in [" + Twine(min) + ", " +
         Twine(max) + "]" + hint);
 }
 
-void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits,
-                             int64_t min, uint64_t max) {
+void macho::reportRangeError(void *loc, SymbolDiagnostic d, const Twine &v,
+                             uint8_t bits, int64_t min, uint64_t max) {
+  // FIXME: should we use `loc` somehow to provide a better error message?
   std::string hint;
   if (d.symbol)
     hint = "; references " + toString(*d.symbol);

diff  --git a/lld/MachO/Relocations.h b/lld/MachO/Relocations.h
index 9457465ac203d..b82ca4ebebccf 100644
--- a/lld/MachO/Relocations.h
+++ b/lld/MachO/Relocations.h
@@ -70,28 +70,28 @@ bool validateSymbolRelocation(const Symbol *, const InputSection *,
  * v: The value the relocation is attempting to encode
  * bits: The number of bits actually available to encode this relocation
  */
-void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits,
-                      int64_t min, uint64_t max);
+void reportRangeError(void *loc, const Reloc &, const llvm::Twine &v,
+                      uint8_t bits, int64_t min, uint64_t max);
 
 struct SymbolDiagnostic {
   const Symbol *symbol;
   llvm::StringRef reason;
 };
 
-void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits,
-                      int64_t min, uint64_t max);
+void reportRangeError(void *loc, SymbolDiagnostic, const llvm::Twine &v,
+                      uint8_t bits, int64_t min, uint64_t max);
 
 template <typename Diagnostic>
-inline void checkInt(Diagnostic d, int64_t v, int bits) {
+inline void checkInt(void *loc, Diagnostic d, int64_t v, int bits) {
   if (v != llvm::SignExtend64(v, bits))
-    reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits),
+    reportRangeError(loc, d, llvm::Twine(v), bits, llvm::minIntN(bits),
                      llvm::maxIntN(bits));
 }
 
 template <typename Diagnostic>
-inline void checkUInt(Diagnostic d, uint64_t v, int bits) {
+inline void checkUInt(void *loc, Diagnostic d, uint64_t v, int bits) {
   if ((v >> bits) != 0)
-    reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
+    reportRangeError(loc, d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
 }
 
 inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) {

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index b871477331c8d..58faa87e3059f 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -595,6 +595,7 @@ class WordLiteralSection final : public SyntheticSection {
 };
 
 struct InStruct {
+  const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
   CStringSection *cStringSection = nullptr;
   WordLiteralSection *wordLiteralSection = nullptr;

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 6aaefaae8cc4a..56a26f11e63cf 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1048,10 +1048,10 @@ void Writer::openFile() {
                                FileOutputBuffer::F_executable);
 
   if (!bufferOrErr)
-    error("failed to open " + config->outputFile + ": " +
+    fatal("failed to open " + config->outputFile + ": " +
           llvm::toString(bufferOrErr.takeError()));
-  else
-    buffer = std::move(*bufferOrErr);
+  buffer = std::move(*bufferOrErr);
+  in.bufferStart = buffer->getBufferStart();
 }
 
 void Writer::writeSections() {

diff  --git a/lld/test/MachO/invalid/range-check.s b/lld/test/MachO/invalid/range-check.s
index c25d367dd1184..2f087e38c2662 100644
--- a/lld/test/MachO/invalid/range-check.s
+++ b/lld/test/MachO/invalid/range-check.s
@@ -6,8 +6,8 @@
 # RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
 # RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s
 
-# CHECK: error: relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
-# CHECK: error: relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
+# CHECK: error: {{.*}}test.o:(symbol _main+0xd): relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
+# CHECK: error: {{.*}}test.o:(symbol _main+0x3): relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
 # CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar
 # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
 # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]


        


More information about the llvm-commits mailing list